Exploratory Data Analysis
Overdose Deaths
############################## EDA: Plot the Outcome and Intervention Trends ###############################
#plot the time series of the number of deaths and probability of overdose death
od_data_recent <- read.csv("./Data/od_unintentional_yearly_18_and_up_11_28_21.txt",
sep = "\t", stringsAsFactors = FALSE)
od_data_recent$Deaths <- as.numeric(od_data_recent$Deaths)
od_data_recent<-od_data_recent[!is.na(od_data_recent$Year),] #delete the rows that just contains data set description info
od_data_recent<- od_data_recent %>%
filter(Year > 1999 & Year < 2020) %>%
group_by(Year) %>%
summarise(sum_deaths = sum(Deaths, na.rm = TRUE))
# pdf("./Figures/total_od_deaths_all_paper_11_29_21_2000_2019.pdf")
ggplot(data = od_data_recent, mapping = aes(x = Year, y = sum_deaths)) +
geom_line() +
geom_point() +
labs(x = "Year", y = "Yearly Number of Unintentional Drug Overdose Deaths in the 50 U.S. States") +
theme(panel.background = element_rect("white"),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.title=element_text(family="Times", size=10, face="bold"),
axis.text=element_text(family="Times",size=10)) +
scale_x_continuous(breaks = seq(2000, 2020, by = 2)) +
ylim(c(0, 62000))

# dev.off()
main_analysis_data_sum <- main_analysis_data %>%
group_by(year = year(Time_Period_Start)) %>%
summarise(total_deaths = sum(imputed_deaths),
sum_pop = sum(population)/2,
total_prop = sum(imputed_deaths)/(sum(population)/2),
total_prop_by_100000 = 100000*sum(imputed_deaths)/(sum(population)/2))
# %>%mutate(date = as.Date(as.yearmon(year)))
#compute the percentage difference between 2000 and 2019
death_2000 <- main_analysis_data_sum$total_deaths[main_analysis_data_sum$year == 2000]
death_2019 <- main_analysis_data_sum$total_deaths[main_analysis_data_sum$year == 2019]
((death_2019 - death_2000)/death_2000)*100
## [1] 435.5701
Intervention: DIH Prosecutions
#plot the number of states with an intervention for each time point
#first, create a data set to find the number of states with an intervention at each time point
#initialize the data set with the start date of the time period
num_states_with_intervention<-data.frame("Start_Date" =
unique((main_analysis_data$Intervention_First_Date[!is.na(main_analysis_data$Intervention_First_Date)])))
numStates<-c()
#for each time period i, we first find the states where the first intervention date occurred before i
#then, we append it to numStates
for(i in unique((num_states_with_intervention$Start_Date))){
states_w_int<-unique(main_analysis_data$State[(main_analysis_data$Intervention_First_Date)<=i])
numStates<-append(numStates, length(states_w_int[!is.na(states_w_int)]))
}
num_states_with_intervention$numStates<-numStates
num_states_with_intervention$Start_Date <- as.Date(num_states_with_intervention$Start_Date)
num_states_with_intervention <- rbind(data.frame("Start_Date" = c(as.Date("2000-01-01"),
as.Date("2019-12-31")),
"numStates" = c(0, max(num_states_with_intervention$numStates))),
num_states_with_intervention)
num_states_with_intervention <- num_states_with_intervention %>%
arrange(Start_Date) %>%
mutate(lag_numStates = lag(numStates))
num_states_with_intervention <- num_states_with_intervention %>%
pivot_longer( c("lag_numStates", "numStates"), "numStates")
# pdf("Figures/num_states_with_intervention_11_29_21.pdf")
ggplot(num_states_with_intervention, aes(x = Start_Date, y = value, group = 1)) +
geom_line() +
# geom_point(num_states_with_intervention[num_states_with_intervention$numStates == "numStates",],
# mapping = aes(x = Start_Date, y = value, group = 1), size = 1) +
labs(x = "Year", y = "Cumulative Number of States to have DIH Prosecutions") +
theme(axis.text=element_text(family="Times",size=10),
axis.title=element_text(family="Times", size=10, face="bold"),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
axis.text.x = element_text(family="Times", size=10),
panel.background = element_rect("white")) +
scale_x_date(date_labels="%Y", breaks = seq(as.Date("2000-01-01"), as.Date("2018-01-01"), by = "2 years"))

# dev.off()
Policy Dates
#add the intervention variable as a measure of number of states with DIH prosecution
main_analysis_data <- main_analysis_data %>%
group_by(Time_Period_Start) %>%
mutate(num_states_w_intervention = sum(Intervention_Redefined))
############################# Look at the policy dates #######################
policy_dates <- main_analysis_data %>%
group_by(State) %>%
summarise(unique(format(Intervention_First_Date, "%Y-%m")),
unique(format(as.Date(Naloxone_Pharmacy_Yes_First_Date), "%Y-%m")),
unique(format(as.Date(Naloxone_Pharmacy_No_First_Date), "%Y-%m")),
unique(format(as.Date(Medical_Marijuana_First_Date), "%Y-%m")),
unique(format(as.Date(Recreational_Marijuana_First_Date), "%Y-%m")),
unique(format(as.Date(PDMP_First_Date), "%Y-%m")),
unique(format(as.Date(GSL_First_Date), "%Y-%m")),
unique(format(as.Date(Medicaid_Expansion_First_Date), "%Y-%m")))
names(policy_dates) <- c("State", "DIH Prosecutions", "NAL: Pharmacists Yes",
"NAL: Pharmacists No", "MML", "RML", "PDMP", "GSL",
"Medicaid")
# write.csv(policy_dates, "./Data/policy_dates_11_29_21.csv")
Create Plot of Number of DIH Prosecutions Per State
#create a plot for each state to see how many prosecution media alerts there are per 6 month period
#read in the prosecution media alert data
prosecution_data<-read.csv("./Data/dih_prosecutions_9_6_21.csv")
#data cleaning
prosecution_data<-prosecution_data %>%
mutate(Date = as.Date(Date.charged, "%m/%d/%Y")) %>%
mutate(State = ifelse(State.Filed == "pennsylvania", "Pennsylvania", State.Filed),
State = ifelse(State.Filed == "Virginia ", "Virginia", State)) %>%
mutate(deceased_age = ifelse(!is.na(as.numeric(Deceased.s.Age)), as.numeric(Deceased.s.Age), 9999)) %>%
filter(!is.na(Date),
State.Filed != "No Info",
State.Filed != "No info",
State.Filed != "No Info ",
deceased_age >= 18,
State != "" ) %>%
mutate(deceased_age = ifelse(deceased_age == 9999, NA, deceased_age))
#clean up the data by looking at the link to the article
prosecution_data$Date[prosecution_data$Date == "2026-08-01"] <- as.Date("2016-02-15", "%Y-%m-%d")
#change the states into Character instead of factor
prosecution_data$State<-as.character(prosecution_data$State)
#see how many prosecution data points there are for each state
table(prosecution_data$State)
##
## Alabama Alaska Arizona Arkansas California
## 11 7 7 4 65
## Colorado Connecticut Delaware Florida Georgia
## 29 43 3 133 27
## Idaho Illinois Indiana Iowa Kansas
## 8 330 55 31 6
## Kentucky Louisiana Maine Maryland Massachusetts
## 41 63 17 58 33
## Michigan Minnesota Mississippi Missouri Montana
## 104 135 1 40 10
## Nebraska Nevada New Hampshire New Jersey New Mexico
## 1 11 38 134 4
## New York North Carolina North Dakota Ohio Oklahoma
## 96 121 51 379 39
## Oregon Pennsylvania Rhode Island South Carolina South Dakota
## 16 708 2 11 12
## Tennessee Texas Utah Vermont Virginia
## 94 43 17 12 59
## Washington West Virginia Wisconsin Wyoming
## 63 33 364 19
#there are some repeated cases depending on victim so extract distinct cases
prosecution_data_unique <- prosecution_data %>%
group_by(State) %>%
distinct(Accused.Name, Date, .keep_all = T)
table(prosecution_data_unique$State)
##
## Alabama Alaska Arizona Arkansas California
## 11 7 7 4 61
## Colorado Connecticut Delaware Florida Georgia
## 27 42 3 129 24
## Idaho Illinois Indiana Iowa Kansas
## 8 324 53 31 6
## Kentucky Louisiana Maine Maryland Massachusetts
## 41 63 17 57 33
## Michigan Minnesota Mississippi Missouri Montana
## 102 135 1 39 9
## Nebraska Nevada New Hampshire New Jersey New Mexico
## 1 11 38 129 4
## New York North Carolina North Dakota Ohio Oklahoma
## 91 118 38 372 32
## Oregon Pennsylvania Rhode Island South Carolina South Dakota
## 16 700 2 11 12
## Tennessee Texas Utah Vermont Virginia
## 94 42 17 12 59
## Washington West Virginia Wisconsin Wyoming
## 60 33 357 19
#change date charged into Date object
prosecution_data_unique$Date<-mdy(prosecution_data_unique$Date.charged)
#group the data into six month periods
prosecution_data_unique<-prosecution_data_unique %>%
mutate(six_month_pd = lubridate::floor_date(Date , "6 months" ))
prosecution_grouped <- prosecution_data_unique %>%
#filter to dates after 2000 and dates before 2020
filter(year(six_month_pd) >= 2000 & year(six_month_pd) <= 2019) %>%
group_by(State, six_month_pd) %>%
#for each state, for each six month period, count the number of DIH prosecutions
summarise(num_dih = n()) %>%
#ONLY IF GROUPS
#label the groups according to zero, low, or high
mutate(group = ifelse(num_dih == 0, "zero", ifelse(num_dih >= 5, "high", "low"))) %>%
ungroup() %>%
#have to add in a row for hawaii because its not in the prosecution dataset
add_row(State = "Hawaii", six_month_pd = as.Date("2000-01-01"), num_dih = 0, group = "zero")
#look at table of ages of the victims
table(as.numeric(prosecution_data_unique$Deceased.s.Age))
##
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
## 88 107 88 155 132 150 142 130 152 122 152 110 148 92 96 100 84 97 46 56
## 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
## 54 53 61 47 27 37 21 29 23 38 24 29 27 18 14 15 17 15 11 10
## 58 59 60 61 62 63 64 66 67 68 69 75 85 87
## 10 22 4 4 7 2 2 2 1 1 3 1 1 1
table(as.numeric(prosecution_data_unique$Deceased.s.Age))/sum(!is.na(as.numeric(prosecution_data_unique$Deceased.s.Age)))
##
## 18 19 20 21 22 23
## 0.0305767894 0.0371785962 0.0305767894 0.0538568450 0.0458651842 0.0521195274
## 24 25 26 27 28 29
## 0.0493398193 0.0451702571 0.0528144545 0.0423905490 0.0528144545 0.0382209868
## 30 31 32 33 34 35
## 0.0514246004 0.0319666435 0.0333564976 0.0347463516 0.0291869354 0.0337039611
## 36 37 38 39 40 41
## 0.0159833218 0.0194579569 0.0187630299 0.0184155664 0.0211952745 0.0163307853
## 42 43 44 45 46 47
## 0.0093815149 0.0128561501 0.0072967338 0.0100764420 0.0079916609 0.0132036136
## 48 49 50 51 52 53
## 0.0083391244 0.0100764420 0.0093815149 0.0062543433 0.0048644892 0.0052119527
## 54 55 56 57 58 59
## 0.0059068798 0.0052119527 0.0038220987 0.0034746352 0.0034746352 0.0076441974
## 60 61 62 63 64 66
## 0.0013898541 0.0013898541 0.0024322446 0.0006949270 0.0006949270 0.0006949270
## 67 68 69 75 85 87
## 0.0003474635 0.0003474635 0.0010423905 0.0003474635 0.0003474635 0.0003474635
#view table of people in each age category, based on prosecution data
prosecution_data_unique <- prosecution_data_unique %>%
mutate(Deceased.s.Age = as.numeric(Deceased.s.Age),
age_groups = ifelse(Deceased.s.Age <= 17, "0_17",
ifelse(Deceased.s.Age >= 18 & Deceased.s.Age <= 34, "18_34",
ifelse(Deceased.s.Age >= 35 & Deceased.s.Age <= 54, "35_54",
ifelse(Deceased.s.Age >= 55 & Deceased.s.Age <= 64, "55_64", "65_plus")))))
table(prosecution_data_unique$age_groups, exclude = "NA")
##
## 18_34 35_54 55_64 65_plus <NA>
## 2048 733 87 10 624
#we compute the final group for each state by seeing if it ever hits high or low
#if it hits a higher level, then it will remain defined in that higher level
prosecution_grouped_final <- prosecution_grouped %>%
group_by(State) %>%
summarise(final_gp = ifelse(sum(group == "high") > 0, "high", ifelse(sum(group == "low")> 0, "low", "zero")))
#plot of the number of states in each zero/low/high category
ggplot(prosecution_grouped_final, aes(final_gp)) +
geom_bar() +
labs(title = "Number of States by DIH prosecution Category, with Low = [1,5]") +
geom_text(aes(label = ..count..), stat = "count", vjust = -.75)

#number of DIH prosecutions per six month for each state
# pdf("Figures/num_dih_per_six_month_pd_by_state_11_29_21.pdf")
ggplot(prosecution_grouped, aes(x = six_month_pd, y = num_dih)) +
geom_bar(stat = "identity") +
facet_wrap(~State) +
labs(y = "Number of DIH Prosecutions Reported in the Media",
x = "Date") +
theme(axis.text.x = element_text(hjust = 1, size = 6, family = "Times", angle = 30),
axis.text.y = element_text(size = 6, family = "Times"),
axis.title = element_text(size = 10, face = "bold", family = "Times"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size=8),
panel.background = element_rect("white"),
legend.position = "bottom")

# dev.off()
# write.csv(prosecution_grouped, "./Data/num_dih_per_six_month_pd_by_state_11_12_21.csv")
Main Analysis: Effect of At Least One DIH Prosecution Report in Media on Unintentional Overdose Deaths
Analysis
############################## Run Model with Spline Time Effects by Region ###############################
#model that we will be using for the main analysis
#cr is used for cubic regression spline -- we are smoothing time effects by region
#run the analysis for all the states
main_analysis_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
Naloxone_Pharmacy_Yes_Redefined +
Naloxone_Pharmacy_No_Redefined +
Medical_Marijuana_Redefined +
Recreational_Marijuana_Redefined +
GSL_Redefined +
PDMP_Redefined +
Medicaid_Expansion_Redefined +
Intervention_Redefined +
num_states_w_intervention,
data = main_analysis_data, family = "binomial")
#summary output of the model
stargazer(main_analysis_model, type = "html", dep.var.labels = c("Unintentional Overdose Deaths"))
|
|
|
|
Dependent variable:
|
|
|
|
|
|
Unintentional Overdose Deaths
|
|
|
|
StateAlaska
|
0.277***
|
|
|
(0.028)
|
|
|
|
|
StateArizona
|
0.315***
|
|
|
(0.014)
|
|
|
|
|
StateArkansas
|
-0.390***
|
|
|
(0.020)
|
|
|
|
|
StateCalifornia
|
-0.159***
|
|
|
(0.013)
|
|
|
|
|
StateColorado
|
0.096***
|
|
|
(0.016)
|
|
|
|
|
StateConnecticut
|
0.189***
|
|
|
(0.016)
|
|
|
|
|
StateDelaware
|
0.430***
|
|
|
(0.022)
|
|
|
|
|
StateFlorida
|
0.253***
|
|
|
(0.012)
|
|
|
|
|
StateGeorgia
|
-0.071***
|
|
|
(0.013)
|
|
|
|
|
StateHawaii
|
-0.226***
|
|
|
(0.026)
|
|
|
|
|
StateIdaho
|
-0.136***
|
|
|
(0.024)
|
|
|
|
|
StateIllinois
|
-0.016
|
|
|
(0.013)
|
|
|
|
|
StateIndiana
|
0.088***
|
|
|
(0.014)
|
|
|
|
|
StateIowa
|
-0.740***
|
|
|
(0.021)
|
|
|
|
|
StateKansas
|
-0.328***
|
|
|
(0.019)
|
|
|
|
|
StateKentucky
|
0.641***
|
|
|
(0.014)
|
|
|
|
|
StateLouisiana
|
0.294***
|
|
|
(0.014)
|
|
|
|
|
StateMaine
|
0.145***
|
|
|
(0.022)
|
|
|
|
|
StateMaryland
|
-1.067***
|
|
|
(0.019)
|
|
|
|
|
StateMassachusetts
|
0.208***
|
|
|
(0.014)
|
|
|
|
|
StateMichigan
|
-0.019
|
|
|
(0.014)
|
|
|
|
|
StateMinnesota
|
-0.618***
|
|
|
(0.017)
|
|
|
|
|
StateMississippi
|
-0.100***
|
|
|
(0.018)
|
|
|
|
|
StateMissouri
|
0.194***
|
|
|
(0.015)
|
|
|
|
|
StateMontana
|
-0.359***
|
|
|
(0.029)
|
|
|
|
|
StateNebraska
|
-0.883***
|
|
|
(0.029)
|
|
|
|
|
StateNevada
|
0.439***
|
|
|
(0.017)
|
|
|
|
|
StateNew Hampshire
|
0.257***
|
|
|
(0.020)
|
|
|
|
|
StateNew Jersey
|
0.107***
|
|
|
(0.013)
|
|
|
|
|
StateNew Mexico
|
0.631***
|
|
|
(0.017)
|
|
|
|
|
StateNew York
|
-0.238***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Carolina
|
0.177***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Dakota
|
-1.053***
|
|
|
(0.045)
|
|
|
|
|
StateOhio
|
0.455***
|
|
|
(0.012)
|
|
|
|
|
StateOklahoma
|
0.385***
|
|
|
(0.015)
|
|
|
|
|
StateOregon
|
-0.194***
|
|
|
(0.018)
|
|
|
|
|
StatePennsylvania
|
0.436***
|
|
|
(0.012)
|
|
|
|
|
StateRhode Island
|
0.238***
|
|
|
(0.022)
|
|
|
|
|
StateSouth Carolina
|
0.221***
|
|
|
(0.015)
|
|
|
|
|
StateSouth Dakota
|
-0.960***
|
|
|
(0.043)
|
|
|
|
|
StateTennessee
|
0.436***
|
|
|
(0.013)
|
|
|
|
|
StateTexas
|
-0.204***
|
|
|
(0.012)
|
|
|
|
|
StateUtah
|
0.072***
|
|
|
(0.018)
|
|
|
|
|
StateVermont
|
-0.167***
|
|
|
(0.031)
|
|
|
|
|
StateVirginia
|
-0.112***
|
|
|
(0.014)
|
|
|
|
|
StateWashington
|
0.078***
|
|
|
(0.015)
|
|
|
|
|
StateWest Virginia
|
0.876***
|
|
|
(0.015)
|
|
|
|
|
StateWisconsin
|
-0.037**
|
|
|
(0.015)
|
|
|
|
|
StateWyoming
|
0.022
|
|
|
(0.034)
|
|
|
|
|
Naloxone_Pharmacy_Yes_Redefined
|
-0.024***
|
|
|
(0.008)
|
|
|
|
|
Naloxone_Pharmacy_No_Redefined
|
0.008
|
|
|
(0.007)
|
|
|
|
|
Medical_Marijuana_Redefined
|
0.063***
|
|
|
(0.006)
|
|
|
|
|
Recreational_Marijuana_Redefined
|
-0.037***
|
|
|
(0.009)
|
|
|
|
|
GSL_Redefined
|
0.034***
|
|
|
(0.006)
|
|
|
|
|
PDMP_Redefined
|
-0.020***
|
|
|
(0.006)
|
|
|
|
|
Medicaid_Expansion_Redefined
|
0.099***
|
|
|
(0.006)
|
|
|
|
|
Intervention_Redefined
|
0.062***
|
|
|
(0.005)
|
|
|
|
|
num_states_w_intervention
|
0.004**
|
|
|
(0.002)
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.9
|
|
|
|
|
|
|
|
|
Constant
|
-9.911***
|
|
|
(0.054)
|
|
|
|
|
|
|
Observations
|
2,000
|
|
Adjusted R2
|
0.911
|
|
Log Likelihood
|
-16,751.790
|
|
UBRE
|
8.841
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Sandwich Estimator
#here, we estimate the variance-covariance matrix through the sandwich estimator
#we create a function so that we don't have to keep writing the code:
#cov_data is such that rows are state-time combinations and columns are the different policy measures
#coef_values need to be in order of the columns of cov_data
#z_value is the z-value that corresponds to the CI. We default to 95% CI so we default to 1.96
compute_sd_and_CI <- function(cov_data, population, observed_od, predicted_prob_od, coef_values, z_value = 1.96){
#compute the predicted number of OD
pred_num_od <- population*predicted_prob_od
#compute the square of observed number of OD minus predicted number of OD
obs_minus_pred_num_od_sq <- (observed_od - pred_num_od)^2
#estimate the constant term: sum_{s,t} n_{s,t}z_{s,t}*p_{s,t}*(1-p_{s,t})*z_{s,t}^T
#initialize the matrix so that we can add the terms to it
constant_term <- matrix(0, nrow = ncol(cov_data), ncol = ncol(cov_data))
#estimate middle term: sum_{s,t} x_{s,t}*mean(prop_obs_od - prob_od)^2*x_{s,t}^T
middle_term <- matrix(0, nrow = ncol(cov_data), ncol = ncol(cov_data))
for(row in 1:nrow(cov_data)){
#note here: we first take t(cov_data[row]) since in the matrix, we have the state-time indices in the rows
#and the covariates in the columns
constant_term <- constant_term +
population[row]*t(cov_data[row,])%*%predicted_prob_od[row]%*%(1-predicted_prob_od[row])%*%as.matrix(cov_data[row,])
middle_term <- middle_term + t(cov_data[row,])%*%obs_minus_pred_num_od_sq[row]%*%as.matrix(cov_data[row,])
}
#variance-covariance = constant_term^{-1}*middle_term*t(constant_term^{-1})
#here, solve computes the inverse of the matrix
var_cov <- solve(constant_term)%*%middle_term%*%t(solve(constant_term))
#we obtain the standard deviations by taking the square root of the diagonal of the variance-covariance matrix.
sd_of_coefficients <- sqrt(diag(var_cov))
#find the CI for the coefficients
lb_coef <- coef_values - z_value*(sd_of_coefficients)
ub_coef <- coef_values + z_value*(sd_of_coefficients)
return_data_set <- data.frame(lb_coef, coef_values, ub_coef,
exp_lb = exp(lb_coef), exp_coef = exp(coef_values),
exp_ub = exp(ub_coef), sd_coef = sd_of_coefficients)
return(return_data_set)
}
#we only want to estimate the variances for the policies, intervention variable, and number of states with intervention
subset_cov <- main_analysis_data %>%
ungroup() %>%
dplyr::select(
Naloxone_Pharmacy_Yes_Redefined,
Naloxone_Pharmacy_No_Redefined,
Medical_Marijuana_Redefined,
Recreational_Marijuana_Redefined,
GSL_Redefined,
PDMP_Redefined,
Medicaid_Expansion_Redefined,
Intervention_Redefined,
num_states_w_intervention)
#estimate the 95% CI and SD
coefficient_values <- tail(summary(main_analysis_model)$p.coeff, ncol(subset_cov))
pred_prob <- predict(main_analysis_model, newdata = main_analysis_data, type = "response")
main_analysis_sd_and_ci <- compute_sd_and_CI(subset_cov, main_analysis_data$population, main_analysis_data$imputed_deaths,
pred_prob, coefficient_values)
main_analysis_sd_and_ci
## lb_coef coef_values ub_coef
## Naloxone_Pharmacy_Yes_Redefined -0.067872997 -0.024109299 0.019654398
## Naloxone_Pharmacy_No_Redefined -0.021034248 0.008403335 0.037840919
## Medical_Marijuana_Redefined 0.041340673 0.062983026 0.084625379
## Recreational_Marijuana_Redefined -0.069389910 -0.036775616 -0.004161323
## GSL_Redefined -0.006809296 0.033853246 0.074515788
## PDMP_Redefined -0.050139563 -0.019514080 0.011111403
## Medicaid_Expansion_Redefined 0.071006665 0.099106282 0.127205899
## Intervention_Redefined 0.036522147 0.061825922 0.087129696
## num_states_w_intervention 0.002706159 0.003692466 0.004678773
## exp_lb exp_coef exp_ub sd_coef
## Naloxone_Pharmacy_Yes_Redefined 0.9343791 0.9761790 1.0198488 0.022328417
## Naloxone_Pharmacy_No_Redefined 0.9791854 1.0084387 1.0385660 0.015019175
## Medical_Marijuana_Redefined 1.0422071 1.0650088 1.0883093 0.011042017
## Recreational_Marijuana_Redefined 0.9329628 0.9638924 0.9958473 0.016639946
## GSL_Redefined 0.9932138 1.0344328 1.0773624 0.020746195
## PDMP_Redefined 0.9510967 0.9806751 1.0111734 0.015625246
## Medicaid_Expansion_Redefined 1.0735884 1.1041836 1.1356508 0.014336539
## Intervention_Redefined 1.0371973 1.0637771 1.0910382 0.012910089
## num_states_w_intervention 1.0027098 1.0036993 1.0046897 0.000503218
Plots
#check diagnostics
gam.check(main_analysis_model)

##
## Method: UBRE Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [5.064278e-08,2.529474e-05]
## (score 8.840989 & scale 1).
## Hessian positive definite, eigenvalue range [8.477283e-05,0.0004428833].
## Model rank = 95 / 95
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(Time_Period_ID):as.factor(Region)Midwest 9.00 8.89 1.05 1.00
## s(Time_Period_ID):as.factor(Region)Northeast 9.00 8.96 1.05 0.99
## s(Time_Period_ID):as.factor(Region)South 9.00 8.93 1.05 0.98
## s(Time_Period_ID):as.factor(Region)West 9.00 8.22 1.05 0.98
main_analysis_model_object <- getViz(main_analysis_model)
midwest_plot <- plot(sm(main_analysis_model_object, 1)) +
l_fitLine() +
l_ciLine(mul = 5, linetype = 2) +
theme_classic() +
labs(x = "Time Period", y = "Smoothed Time Effects for Midwest") +
scale_x_continuous(breaks=c(1,11,21,31),
labels=c("2000", "2005", "2010", "2015")) +
theme(text=element_text(family="Times",size=10),
title = element_text(family="Times", size=10, face = "bold"),
panel.background = element_rect("white")) +
ylim(c(-1,1.2))
northeast_plot <- plot(sm(main_analysis_model_object,2)) +
l_fitLine() +
l_ciLine(mul = 5, linetype = 2) +
theme_classic() +
labs(x = "Time Period", y = "Smoothed Time Effects for Northeast") +
scale_x_continuous(breaks=c(1,11,21,31),
labels=c("2000", "2005", "2010", "2015"))+
theme(text=element_text(family="Times",size=10),
title = element_text(family="Times", size=10, face = "bold"),
panel.background = element_rect("white")) +
ylim(c(-1,1.2))
south_plot <- plot(sm(main_analysis_model_object, 3)) +
l_fitLine() +
l_ciLine(mul = 5, linetype = 2) +
theme_classic() +
labs(x = "Time Period", y = "Smoothed Time Effects for South") +
scale_x_continuous(breaks=c(1,11,21,31),
labels=c("2000", "2005","2010", "2015"))+
theme(text=element_text(family="Times",size=10),
title = element_text(family="Times", size=10, face = "bold"),
panel.background = element_rect("white")) +
ylim(c(-1,1.2))
west_plot <- plot(sm(main_analysis_model_object, 4)) +
l_fitLine() +
l_ciLine(mul = 5, linetype = 2) + theme_classic() +
labs(x = "Time Period", y = "Smoothed Time Effects for West") +
scale_x_continuous(breaks=c(1,11,21,31),
labels=c("2000", "2005", "2010", "2015"))+
theme(text=element_text(family="Times",size=10),
title = element_text(family="Times", size=10, face = "bold"),
panel.background = element_rect("white")) +
ylim(c(-1,1.2))
# pdf("./Figures/time_smoothed_effects_9_6_21.pdf")
gridPrint(midwest_plot, northeast_plot, south_plot, west_plot, ncol = 2)

# dev.off()
total_pop <- main_analysis_data %>%
group_by(year = year(Time_Period_Start), State) %>%
summarise(pop = unique(population)) %>%
group_by(year) %>%
summarise(sum(pop))
main_analysis_data %>%
group_by(year(Time_Period_Start)) %>%
summarise(sum_deaths = sum(imputed_deaths)*100000) %>%
mutate(sum_deaths/total_pop$`sum(pop)`)
## # A tibble: 20 x 3
## `year(Time_Period_Start)` sum_deaths `sum_deaths/total_pop$\`sum(pop)\``
## * <dbl> <dbl> <dbl>
## 1 2000 1151390000 5.52
## 2 2001 1276465000 6.03
## 3 2002 1614890000 7.54
## 4 2003 1799140000. 8.31
## 5 2004 1953250000 8.92
## 6 2005 2216225000 10.0
## 7 2006 2603525000 11.6
## 8 2007 2730800000 12.0
## 9 2008 2787500000 12.1
## 10 2009 2848100000 12.3
## 11 2010 2969200000 12.7
## 12 2011 3276800000 13.9
## 13 2012 3291600000 13.8
## 14 2013 3541000000 14.7
## 15 2014 3847600000 15.8
## 16 2015 4381900000 17.9
## 17 2016 5433100000 21.9
## 18 2017 6084700000 24.4
## 19 2018 5847900000 23.2
## 20 2019 6166500000 24.3
main_analysis_data %>%
group_by(State, year(Time_Period_Start)) %>%
summarise(death_rate = (sum(imputed_deaths)/unique(population))*100000) %>%
group_by(State) %>%
summarise(first_death_rate = death_rate[1],
last_death_rate = death_rate[20]) %>%
mutate(range_death_rate = last_death_rate - first_death_rate) %>%
filter(range_death_rate == min(range_death_rate) | range_death_rate == max(range_death_rate))
## # A tibble: 2 x 4
## State first_death_rate last_death_rate range_death_rate
## <chr> <dbl> <dbl> <dbl>
## 1 Nebraska 1.67 8.40 6.74
## 2 West Virginia 3.98 57.6 53.6
# #summarize the DIH dates
# main_analysis_data %>%
# group_by(Time_Period_Start) %>%
# summarise(prop_w_intervention = mean(Intervention_Redefined > 0)) %>%
# View()
#create a data frame to store the results and compute the confidence intervals
#initialize the columns
main_analysis_plot_table<-data.frame(State = main_analysis_data$State)
main_analysis_plot_table$Fitted<-rep(NA, nrow(main_analysis_plot_table))
main_analysis_plot_table$Observed<-rep(NA, nrow(main_analysis_plot_table))
main_analysis_plot_table$Time<-main_analysis_data$Time_Period_ID
main_analysis_plot_table$Time_Date<-main_analysis_data$Time_Period_Start
main_analysis_plot_table$Intervention_Date<-main_analysis_data$Intervention_First_Date
#we want to compare the fitted probability of overdose death and the observed values to see how the model does as a goodness of fit visual test
for(i in unique(main_analysis_plot_table$State)){
#for each state, we first subset the main analysis data to only look at the data for that state
index_of_state<-which(main_analysis_plot_table$State == i)
#impute the fitted and observed probability of overdose death for the state
main_analysis_plot_table$Fitted[index_of_state]<-fitted(main_analysis_model)[index_of_state]
main_analysis_plot_table$Observed[index_of_state] <- (main_analysis_data$imputed_deaths[main_analysis_data$State == i]/main_analysis_data$population[main_analysis_data$State == i])
}
#plot to compare the fitted values vs observed deaths
# pdf("./Figures/GAM_fitted_vs_actual_by_Region_9_6_21_with_int_date_full_data.pdf")
ggplot(data = main_analysis_plot_table, aes(x = Time_Date, y = Observed*100000, group = 1,
color = "Observed")) +
geom_line(aes(color = "Observed"))+ geom_point(aes(color = "Observed"), size = .5, alpha = .5) +
geom_line(data = main_analysis_plot_table, aes(x = Time_Date, y = Fitted*100000, group = 1,
color = "Estimate")) +
geom_point(data = main_analysis_plot_table, aes(x = Time_Date, y = Fitted*100000,
color = "Estimate"),
size = .5, alpha = .5) +
scale_color_manual(values = c("pink", "blue")) +
geom_vline(main_analysis_plot_table, mapping = aes(xintercept = Intervention_Date)) +
facet_wrap(facets = vars(State), scales = "free_y", ncol = 5) +
theme(axis.text.x = element_text(hjust = 1, size = 6, family = "Times"),
axis.text.y = element_text(size = 6, family = "Times"),
axis.title=element_text(size = 10,face = "bold", family = "Times"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size=8),
panel.background = element_rect("white"),
legend.position = "bottom") +
labs(x = "Year", y = "Unintentional Drug Overdose Death Rates per 100,000 People",
color = "")

# dev.off()
#diagnostic plots to check model
residTab <- data.frame(logit_fitted_vals = logit(fitted(main_analysis_model)),
resids = residuals(main_analysis_model))
# pdf("./Figures/deviance_resids_9_6_21.pdf")
ggplot(residTab, aes(x = logit_fitted_vals, y = resids)) +
geom_point() +
theme(text = element_text(size = 10, family = "Times"),
title = element_text(size = 10, family = "Times", face = "bold"),
panel.background = element_rect(fill = "white", color = "black")) +
# theme_classic() +
labs(x = "Logistic Function of Fitted Values", y = "Deviance Residuals")

# dev.off()
pred_vals <- predict(main_analysis_model)
resids <- resid(main_analysis_model, type = "response")
# pdf("./Figures/binned_resids_plot_9_6_21.pdf")
par(font.lab = 2)
par(family = "Times")
binnedplot(pred_vals, resids, main = "",
xlab = "Average Logistic Function of Fitted Values",
ylab = "Average Residuals")

# dev.off()
Compile Results
############################## Main Analysis: Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
main_analysis_full_table<-data.frame(coef(main_analysis_model))
#check to see how the table looks
head(main_analysis_full_table)
## coef.main_analysis_model.
## (Intercept) -9.91068390
## StateAlaska 0.27712443
## StateArizona 0.31458505
## StateArkansas -0.39014795
## StateCalifornia -0.15908174
## StateColorado 0.09639187
#rename the column to "Coefficient_Estimate"
colnames(main_analysis_full_table)<-c("Coefficient_Estimate")
#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined",
"Naloxone_Pharmacy_No_Redefined",
"Medical_Marijuana_Redefined",
"Recreational_Marijuana_Redefined",
"GSL_Redefined",
"PDMP_Redefined",
"Medicaid_Expansion_Redefined",
"Intervention_Redefined",
"num_states_w_intervention")
#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(main_analysis_full_table))){
#if the coefficient is not in the covariates vector
if(!(rownames(main_analysis_full_table)[i] %in% covariates)){
#we see if it's a state effect
if(substr(rownames(main_analysis_full_table)[i], start = 1, stop = 5) == "State"){
#if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
#and just rename these rows to just the state name
rownames(main_analysis_full_table)[i]<-substr(rownames(main_analysis_full_table)[i], start = 6,
stop = nchar(rownames(main_analysis_full_table)[i]))
}else if(rownames(main_analysis_full_table)[i] == "(Intercept)"){
#otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
rownames(main_analysis_full_table)[i]<-"Intercept/Alabama"
}else if(substr(rownames(main_analysis_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){
#otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
#or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
#and change it to "Smoothed Time for Region"
rownames(main_analysis_full_table)[i]<-paste("Smoothed Time for Region ",
substr(rownames(main_analysis_full_table)[i], start = 36,
stop = nchar(rownames(main_analysis_full_table)[i])),
sep = "")
}
}
}
#confidence intervals for the coefficients
main_analysis_full_table$Coefficient_Lower_Bound<-main_analysis_full_table$Coefficient_Estimate -
1.96*summary(main_analysis_model)$se
main_analysis_full_table$Coefficient_Upper_Bound<-main_analysis_full_table$Coefficient_Estimate +
1.96*summary(main_analysis_model)$se
#impute the estimates and confidence intervals in the odds ratio scale
main_analysis_full_table$Odds_Ratio<-exp(main_analysis_full_table$Coefficient_Estimate)
main_analysis_full_table$Odds_Ratio_LB<-exp(main_analysis_full_table$Coefficient_Lower_Bound)
main_analysis_full_table$Odds_Ratio_UB<-exp(main_analysis_full_table$Coefficient_Upper_Bound)
#store the standard error and p-value
main_analysis_full_table$Standard_Error<-summary(main_analysis_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
main_analysis_full_table$p_value<-c(summary(main_analysis_model)$p.pv,
rep(NA, length(coef(main_analysis_model)) -
length(summary(main_analysis_model)$p.pv)))
head(main_analysis_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama -9.91068390 -10.0166188
## Alaska 0.27712443 0.2230357
## Arizona 0.31458505 0.2872618
## Arkansas -0.39014795 -0.4286650
## California -0.15908174 -0.1850903
## Colorado 0.09639187 0.0649615
## Coefficient_Upper_Bound Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama -9.8047490 4.964147e-05 4.465167e-05
## Alaska 0.3312131 1.319331e+00 1.249865e+00
## Arizona 0.3419083 1.369691e+00 1.332773e+00
## Arkansas -0.3516309 6.769567e-01 6.513781e-01
## California -0.1330732 8.529266e-01 8.310292e-01
## Colorado 0.1278222 1.101191e+00 1.067118e+00
## Odds_Ratio_UB Standard_Error p_value
## Intercept/Alabama 5.518888e-05 0.05404841 0.000000e+00
## Alaska 1.392657e+00 0.02759627 9.953454e-24
## Arizona 1.407631e+00 0.01394044 9.275181e-113
## Arkansas 7.035397e-01 0.01965153 1.031808e-87
## California 8.754010e-01 0.01326969 4.089372e-33
## Colorado 1.136351e+00 0.01603591 1.843791e-09
tail(main_analysis_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4 0.2352866 0.2060029
## Smoothed Time for Region West.5 0.1835194 0.1298088
## Smoothed Time for Region West.6 0.1609894 0.0862950
## Smoothed Time for Region West.7 0.2035462 0.1160603
## Smoothed Time for Region West.8 0.3083070 0.2015524
## Smoothed Time for Region West.9 0.4223217 0.3307914
## Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4 0.2645704 1.265271
## Smoothed Time for Region West.5 0.2372300 1.201438
## Smoothed Time for Region West.6 0.2356839 1.174673
## Smoothed Time for Region West.7 0.2910320 1.225742
## Smoothed Time for Region West.8 0.4150616 1.361119
## Smoothed Time for Region West.9 0.5138520 1.525499
## Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4 1.228757 1.302871 0.01494068
## Smoothed Time for Region West.5 1.138611 1.267733 0.02740337
## Smoothed Time for Region West.6 1.090128 1.265774 0.03810940
## Smoothed Time for Region West.7 1.123064 1.337807 0.04463562
## Smoothed Time for Region West.8 1.223300 1.514464 0.05446662
## Smoothed Time for Region West.9 1.392069 1.671718 0.04669916
## p_value
## Smoothed Time for Region West.4 NA
## Smoothed Time for Region West.5 NA
## Smoothed Time for Region West.6 NA
## Smoothed Time for Region West.7 NA
## Smoothed Time for Region West.8 NA
## Smoothed Time for Region West.9 NA
#save the table into a CSV
# write.csv(round(main_analysis_full_table,5), "./Data/coefficients_GAM_9_6_21_full_data_uninentional_od.csv")
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(main_analysis_full_table) %in% covariates)
main_analysis_covariate_table<-(round(main_analysis_full_table[covariate_Index,], 5))
#rename the variables so that it looks cleaner
rownames(main_analysis_covariate_table)<-c("Naloxone_Pharmacy_Yes",
"Naloxone_Pharmacy_No",
"Medical_Marijuana",
"Recreational_Marijuana",
"GSL",
"PDMP",
"Medicaid_Expansion",
"Intervention",
"Number of States with DIH Prosecutions")
#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
main_analysis_covariate_table<-rbind(main_analysis_covariate_table,
main_analysis_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
main_analysis_covariate_table<-main_analysis_covariate_table[,-which(colnames(main_analysis_covariate_table) %in%
c("Coefficient_Estimate", "Coefficient_Lower_Bound",
"Coefficient_Upper_Bound", "Standard_Error"))]
colnames(main_analysis_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(main_analysis_covariate_table, 10)
## Risk_Ratio_Estimates RR_95_CI_LB
## Naloxone_Pharmacy_Yes 9.761800e-01 9.614500e-01
## Naloxone_Pharmacy_No 1.008440e+00 9.956600e-01
## Medical_Marijuana 1.065010e+00 1.053270e+00
## Recreational_Marijuana 9.638900e-01 9.477300e-01
## GSL 1.034430e+00 1.023090e+00
## PDMP 9.806800e-01 9.695800e-01
## Medicaid_Expansion 1.104180e+00 1.091150e+00
## Intervention 1.063780e+00 1.052640e+00
## Number of States with DIH Prosecutions 1.003700e+00 1.000080e+00
## Intercept/Alabama 4.964147e-05 4.465167e-05
## RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes 9.911300e-01 0.00188
## Naloxone_Pharmacy_No 1.021380e+00 0.19663
## Medical_Marijuana 1.076870e+00 0.00000
## Recreational_Marijuana 9.803300e-01 0.00002
## GSL 1.045900e+00 0.00000
## PDMP 9.919000e-01 0.00078
## Medicaid_Expansion 1.117380e+00 0.00000
## Intervention 1.075040e+00 0.00000
## Number of States with DIH Prosecutions 1.007330e+00 0.04512
## Intercept/Alabama 5.518888e-05 0.00000
#save the table into a CSV
# write.csv(round(main_analysis_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_unintentional_od.csv")
Attributable Deaths
############################## Main Analysis: Number of Overdose Deaths Attributed to Intervention ###############################
#find the number of deaths attributable to the intervention
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_main_analysis<-main_analysis_data[which(main_analysis_data$Intervention_Redefined>0),]
#compute the probability of overdose had intervention not occurred
prob_od_no_int_main_analysis<-expit(-coef(main_analysis_model)["Intervention_Redefined"]*attr_deaths_anlys_main_analysis$Intervention_Redefined
+ logit(attr_deaths_anlys_main_analysis$imputed_deaths/attr_deaths_anlys_main_analysis$population))
#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(main_analysis_model) - 1.96*summary(main_analysis_model)$se
coef_ub<-coef(main_analysis_model) + 1.96*summary(main_analysis_model)$se
#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_main_analysis<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_main_analysis$Intervention_Redefined
+ logit(attr_deaths_anlys_main_analysis$imputed_deaths/attr_deaths_anlys_main_analysis$population))
prob_od_no_int_UB_main_analysis<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_main_analysis$Intervention_Redefined
+ logit(attr_deaths_anlys_main_analysis$imputed_deaths/attr_deaths_anlys_main_analysis$population))
#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(attr_deaths_anlys_main_analysis$Time_Period_ID)))
#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention
index<-1 #keep track of where to store the values in the vector
for(time in sort(unique(attr_deaths_anlys_main_analysis$Time_Period_ID))){
#find the indices of rows where the time point = time
time_point_index<-which(attr_deaths_anlys_main_analysis$Time_Period_ID == time)
#find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
num_attr_od[index]<-sum(attr_deaths_anlys_main_analysis$imputed_deaths[time_point_index]
- prob_od_no_int_main_analysis[time_point_index]*attr_deaths_anlys_main_analysis$population[time_point_index])
#find the lower and upper bounds of the estimated number of deaths attributable to the intervention
num_attr_od_LB[index]<-sum(attr_deaths_anlys_main_analysis$imputed_deaths[time_point_index]
- prob_od_no_int_LB_main_analysis[time_point_index]*attr_deaths_anlys_main_analysis$population[time_point_index])
num_attr_od_UB[index]<-sum(attr_deaths_anlys_main_analysis$imputed_deaths[time_point_index]
- prob_od_no_int_UB_main_analysis[time_point_index]*attr_deaths_anlys_main_analysis$population[time_point_index])
index<-index + 1
}
num_attr_od_main_analysis<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_main_analysis$Time_Period_ID)),
"Time_Start" = sort(unique(attr_deaths_anlys_main_analysis$Time_Period_Start)),
"Num_Attr_Deaths" = num_attr_od,
"Num_Attr_Deaths_LB" = num_attr_od_LB,
"Num_Attr_Deaths_UB" = num_attr_od_UB)
#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_main_analysis$Num_Attr_Deaths)
## [1] 33114.98
summary(num_attr_od_main_analysis$Num_Attr_Deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.971 355.713 702.014 827.874 1154.626 1931.467
num_attr_od_main_analysis$Time_Start<-as.Date(num_attr_od_main_analysis$Time_Start)
#compute the 95% CI for the total
sum(num_attr_od_main_analysis$Num_Attr_Deaths_LB)
## [1] 27619.4
sum(num_attr_od_main_analysis$Num_Attr_Deaths_UB)
## [1] 38553.26
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_main_analysis<-num_attr_od_main_analysis %>%
group_by("year" = year(Time_Start)) %>%
summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
death_ub = sum(Num_Attr_Deaths_UB))
summary(yearly_num_Attr_Deaths_main_analysis$deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 29.82 753.33 1409.54 1655.75 2306.53 3684.37
summary(yearly_num_Attr_Deaths_main_analysis$death_lb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 24.85 628.27 1175.62 1380.97 1923.79 3073.00
summary(yearly_num_Attr_Deaths_main_analysis$death_ub)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 34.74 877.10 1641.02 1927.66 2685.28 4289.35
Secondary Analysis: Effect of At Least One DIH Prosecution Report in Media on All Overdose Deaths
Analysis
############################## Secondary Analysis: All Overdose Deaths ###############################
od_all_data <- read.csv("./Data/full_data_set_9_7_21_all_od.csv")
colnames(od_all_data)[which(colnames(od_all_data) == "sum_deaths")] <- "imputed_deaths"
od_all_data$Time_Period_Start <- as.Date(od_all_data$Time_Period_Start)
od_all_data$Intervention_First_Date <- as.Date(od_all_data$Intervention_First_Date)
#initialize vector with "West" and then impute the other regions for the states
od_all_data$Region<-rep("West", nrow(od_all_data))
for(state in unique(od_all_data$State)){
if(state %in% region.list$Northeast){
od_all_data$Region[od_all_data$State == state]<-"Northeast"
}else if(state %in% region.list$Midwest){
od_all_data$Region[od_all_data$State == state]<-"Midwest"
}else if(state %in% region.list$South){
od_all_data$Region[od_all_data$State == state]<-"South"
}
}
#compute the number of states with intervention by adding up the intervention variable
od_all_data <- od_all_data %>%
group_by(Time_Period_Start) %>%
mutate(num_states_w_intervention = sum(Intervention_Redefined))
#model that we will be using for the main analysis
#cr is used for cubic regression spline -- we are smoothing time effects by region
#run the analysis for all the states
od_all_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
Naloxone_Pharmacy_Yes_Redefined +
Naloxone_Pharmacy_No_Redefined +
Medical_Marijuana_Redefined +
Recreational_Marijuana_Redefined +
GSL_Redefined +
PDMP_Redefined +
Medicaid_Expansion_Redefined +
Intervention_Redefined +
num_states_w_intervention,
data = od_all_data, family = "binomial")
#summary output of the model
stargazer(od_all_model, type = "html", dep.var.labels = "All Overdose Deaths")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
All Overdose Deaths
|
|
|
|
StateAlaska
|
0.223***
|
|
|
(0.024)
|
|
|
|
|
StateArizona
|
0.321***
|
|
|
(0.012)
|
|
|
|
|
StateArkansas
|
-0.123***
|
|
|
(0.016)
|
|
|
|
|
StateCalifornia
|
-0.143***
|
|
|
(0.011)
|
|
|
|
|
StateColorado
|
0.163***
|
|
|
(0.014)
|
|
|
|
|
StateConnecticut
|
0.134***
|
|
|
(0.014)
|
|
|
|
|
StateDelaware
|
0.367***
|
|
|
(0.020)
|
|
|
|
|
StateFlorida
|
0.256***
|
|
|
(0.010)
|
|
|
|
|
StateGeorgia
|
-0.123***
|
|
|
(0.012)
|
|
|
|
|
StateHawaii
|
-0.107***
|
|
|
(0.021)
|
|
|
|
|
StateIdaho
|
-0.028
|
|
|
(0.019)
|
|
|
|
|
StateIllinois
|
-0.091***
|
|
|
(0.011)
|
|
|
|
|
StateIndiana
|
0.135***
|
|
|
(0.012)
|
|
|
|
|
StateIowa
|
-0.590***
|
|
|
(0.018)
|
|
|
|
|
StateKansas
|
-0.205***
|
|
|
(0.016)
|
|
|
|
|
StateKentucky
|
0.519***
|
|
|
(0.012)
|
|
|
|
|
StateLouisiana
|
0.271***
|
|
|
(0.012)
|
|
|
|
|
StateMaine
|
0.152***
|
|
|
(0.019)
|
|
|
|
|
StateMaryland
|
0.371***
|
|
|
(0.012)
|
|
|
|
|
StateMassachusetts
|
0.307***
|
|
|
(0.012)
|
|
|
|
|
StateMichigan
|
0.259***
|
|
|
(0.011)
|
|
|
|
|
StateMinnesota
|
-0.455***
|
|
|
(0.015)
|
|
|
|
|
StateMississippi
|
-0.145***
|
|
|
(0.016)
|
|
|
|
|
StateMissouri
|
0.201***
|
|
|
(0.013)
|
|
|
|
|
StateMontana
|
-0.020
|
|
|
(0.023)
|
|
|
|
|
StateNebraska
|
-0.630***
|
|
|
(0.023)
|
|
|
|
|
StateNevada
|
0.426***
|
|
|
(0.014)
|
|
|
|
|
StateNew Hampshire
|
0.257***
|
|
|
(0.018)
|
|
|
|
|
StateNew Jersey
|
0.046***
|
|
|
(0.012)
|
|
|
|
|
StateNew Mexico
|
0.561***
|
|
|
(0.015)
|
|
|
|
|
StateNew York
|
-0.205***
|
|
|
(0.011)
|
|
|
|
|
StateNorth Carolina
|
0.125***
|
|
|
(0.011)
|
|
|
|
|
StateNorth Dakota
|
-0.875***
|
|
|
(0.037)
|
|
|
|
|
StateOhio
|
0.374***
|
|
|
(0.011)
|
|
|
|
|
StateOklahoma
|
0.307***
|
|
|
(0.013)
|
|
|
|
|
StateOregon
|
0.109***
|
|
|
(0.014)
|
|
|
|
|
StatePennsylvania
|
0.364***
|
|
|
(0.011)
|
|
|
|
|
StateRhode Island
|
0.319***
|
|
|
(0.019)
|
|
|
|
|
StateSouth Carolina
|
0.126***
|
|
|
(0.013)
|
|
|
|
|
StateSouth Dakota
|
-0.694***
|
|
|
(0.033)
|
|
|
|
|
StateTennessee
|
0.396***
|
|
|
(0.011)
|
|
|
|
|
StateTexas
|
-0.249***
|
|
|
(0.011)
|
|
|
|
|
StateUtah
|
0.457***
|
|
|
(0.014)
|
|
|
|
|
StateVermont
|
-0.021
|
|
|
(0.026)
|
|
|
|
|
StateVirginia
|
-0.146***
|
|
|
(0.012)
|
|
|
|
|
StateWashington
|
0.131***
|
|
|
(0.013)
|
|
|
|
|
StateWest Virginia
|
0.770***
|
|
|
(0.014)
|
|
|
|
|
StateWisconsin
|
-0.016
|
|
|
(0.013)
|
|
|
|
|
StateWyoming
|
0.061**
|
|
|
(0.028)
|
|
|
|
|
Naloxone_Pharmacy_Yes_Redefined
|
0.009
|
|
|
(0.007)
|
|
|
|
|
Naloxone_Pharmacy_No_Redefined
|
0.005
|
|
|
(0.006)
|
|
|
|
|
Medical_Marijuana_Redefined
|
0.041***
|
|
|
(0.005)
|
|
|
|
|
Recreational_Marijuana_Redefined
|
-0.065***
|
|
|
(0.008)
|
|
|
|
|
GSL_Redefined
|
0.011**
|
|
|
(0.005)
|
|
|
|
|
PDMP_Redefined
|
0.016***
|
|
|
(0.005)
|
|
|
|
|
Medicaid_Expansion_Redefined
|
0.112***
|
|
|
(0.005)
|
|
|
|
|
Intervention_Redefined
|
0.020***
|
|
|
(0.005)
|
|
|
|
|
num_states_w_intervention
|
0.014***
|
|
|
(0.002)
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.9
|
|
|
|
|
|
|
|
|
Constant
|
-10.737***
|
|
|
(0.060)
|
|
|
|
|
|
|
Observations
|
2,000
|
|
Adjusted R2
|
0.900
|
|
Log Likelihood
|
-15,832.420
|
|
UBRE
|
7.507
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
# gam.check(od_all_model)
Plots
#create a data frame to store the results and compute the confidence intervals
#initialize the columns
od_all_plot_table<-data.frame(State = od_all_data$State)
od_all_plot_table$Fitted<-rep(NA, nrow(od_all_plot_table))
od_all_plot_table$Observed<-rep(NA, nrow(od_all_plot_table))
od_all_plot_table$Time<-od_all_data$Time_Period_ID
od_all_plot_table$Time_Date<-od_all_data$Time_Period_Start
od_all_plot_table$Intervention_Date<-od_all_data$Intervention_First_Date
#we want to compare the fitted probability of overdose death and the observed values to see how the model does as a goodness of fit visual test
for(i in unique(od_all_plot_table$State)){
#for each state, we first subset the main analysis data to only look at the data for that state
index_of_state<-which(od_all_plot_table$State == i)
#impute the fitted and observed probability of overdose death for the state
od_all_plot_table$Fitted[index_of_state]<-fitted(od_all_model)[index_of_state]
od_all_plot_table$Observed[index_of_state]<-(od_all_data$imputed_deaths[od_all_data$State == i]/od_all_data$population[od_all_data$State == i])
}
#plot to compare the fitted values vs observed deaths
ggplot(data = od_all_plot_table) +
geom_point(aes(x = Time_Date, y = Observed*100000, group = 1, color = "Observed"), size = 0.5, alpha = 0.5) +
geom_line(aes(x = Time_Date, y = Observed*100000, group = 1, color = "Observed"))+
geom_point(data = od_all_plot_table, aes(x = Time_Date, y = Fitted*100000, group = 1, col = "Estimate"), size = 0.5, alpha = 0.5) +
geom_line(data = od_all_plot_table, aes(x = Time_Date, y = Fitted*100000, group = 1, col = "Estimate")) +
geom_vline(od_all_plot_table, mapping = aes(xintercept = Intervention_Date)) +
facet_wrap(facets = vars(State)) +
scale_color_manual(values = c("pink", "blue")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7), axis.text.y = element_text(size = 7),
axis.title=element_text(size = 15,face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
strip.text = element_text(size=8),panel.background = element_rect("white"),
legend.position = "bottom") +
labs(x = "Year", y = "Drug Overdose Death Rates per 100,000 People", color = "")

#plot to show the time smooth effects by region
plot(od_all_model, ylab = "Smoothed Time Effects", xlab = "Time", pages = 1)

#diagnostic plots to check model
gam.check(od_all_model)

##
## Method: UBRE Optimizer: outer newton
## full convergence after 8 iterations.
## Gradient range [-6.991359e-08,1.968549e-06]
## (score 7.507021 & scale 1).
## Hessian positive definite, eigenvalue range [3.584438e-05,0.000246841].
## Model rank = 95 / 95
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(Time_Period_ID):as.factor(Region)Midwest 9.00 8.86 1.04 0.98
## s(Time_Period_ID):as.factor(Region)Northeast 9.00 8.97 1.04 0.96
## s(Time_Period_ID):as.factor(Region)South 9.00 8.89 1.04 0.95
## s(Time_Period_ID):as.factor(Region)West 9.00 8.70 1.04 0.95
plot(logit(fitted(od_all_model)), residuals(od_all_model),
xlab = "Logit(Fitted Values)", ylab = "Deviance Residuals" )

Compile Results
############################## Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
od_all_full_table<-data.frame(coef(od_all_model))
#check to see how the table looks
head(od_all_full_table)
## coef.od_all_model.
## (Intercept) -10.7371006
## StateAlaska 0.2226814
## StateArizona 0.3208871
## StateArkansas -0.1230679
## StateCalifornia -0.1431041
## StateColorado 0.1629195
#rename the column to "Coefficient_Estimate"
colnames(od_all_full_table)<-c("Coefficient_Estimate")
#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined",
"Naloxone_Pharmacy_No_Redefined",
"Medical_Marijuana_Redefined",
"Recreational_Marijuana_Redefined",
"GSL_Redefined",
"PDMP_Redefined",
"Medicaid_Expansion_Redefined",
"Intervention_Redefined",
"num_states_w_intervention")
#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(od_all_full_table))){
#if the coefficient is not in the covariates vector
if(!(rownames(od_all_full_table)[i] %in% covariates)){
#we see if it's a state effect
if(substr(rownames(od_all_full_table)[i], start = 1, stop = 5) == "State"){
#if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
#and just rename these rows to just the state name
rownames(od_all_full_table)[i]<-substr(rownames(od_all_full_table)[i], start = 6,
stop = nchar(rownames(od_all_full_table)[i]))
}else if(rownames(od_all_full_table)[i] == "(Intercept)"){
#otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
rownames(od_all_full_table)[i]<-"Intercept/Alabama"
}else if(substr(rownames(od_all_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){
#otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
#or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
#and change it to "Smoothed Time for Region"
rownames(od_all_full_table)[i]<-paste("Smoothed Time for Region ",
substr(rownames(od_all_full_table)[i], start = 36,
stop = nchar(rownames(od_all_full_table)[i])),
sep = "")
}
}
}
#confidence intervals for the coefficients
od_all_full_table$Coefficient_Lower_Bound<-od_all_full_table$Coefficient_Estimate - 1.96*summary(od_all_model)$se
od_all_full_table$Coefficient_Upper_Bound<-od_all_full_table$Coefficient_Estimate + 1.96*summary(od_all_model)$se
#impute the estimates and confidence intervals in the odds ratio scale
od_all_full_table$Odds_Ratio<-exp(od_all_full_table$Coefficient_Estimate)
od_all_full_table$Odds_Ratio_LB<-exp(od_all_full_table$Coefficient_Lower_Bound)
od_all_full_table$Odds_Ratio_UB<-exp(od_all_full_table$Coefficient_Upper_Bound)
#store the standard error and p-value
od_all_full_table$Standard_Error<-summary(od_all_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
od_all_full_table$p_value<-c(summary(od_all_model)$p.pv,
rep(NA, length(coef(od_all_model)) -
length(summary(od_all_model)$p.pv)))
head(od_all_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama -10.7371006 -10.8556442
## Alaska 0.2226814 0.1749174
## Arizona 0.3208871 0.2973683
## Arkansas -0.1230679 -0.1537843
## California -0.1431041 -0.1655486
## Colorado 0.1629195 0.1359987
## Coefficient_Upper_Bound Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama -10.61855696 2.172383e-05 1.929539e-05
## Alaska 0.27044533 1.249422e+00 1.191148e+00
## Arizona 0.34440588 1.378350e+00 1.346311e+00
## Arkansas -0.09235162 8.842036e-01 8.574570e-01
## California -0.12065954 8.666638e-01 8.474286e-01
## Colorado 0.18984025 1.176942e+00 1.145680e+00
## Odds_Ratio_UB Standard_Error p_value
## Intercept/Alabama 2.445791e-05 0.06048143 0.000000e+00
## Alaska 1.310548e+00 0.02436937 6.376260e-20
## Arizona 1.411151e+00 0.01199937 1.530388e-157
## Arkansas 9.117845e-01 0.01567159 4.064260e-15
## California 8.863357e-01 0.01145130 7.776242e-36
## Colorado 1.209056e+00 0.01373510 1.874870e-32
tail(od_all_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4 0.123135742 0.09293469
## Smoothed Time for Region West.5 -0.008299928 -0.06586925
## Smoothed Time for Region West.6 -0.085185636 -0.16641537
## Smoothed Time for Region West.7 -0.120884114 -0.21682692
## Smoothed Time for Region West.8 -0.091545818 -0.20901973
## Smoothed Time for Region West.9 0.044134671 -0.05495089
## Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4 0.153336799 1.1310379
## Smoothed Time for Region West.5 0.049269391 0.9917344
## Smoothed Time for Region West.6 -0.003955896 0.9183418
## Smoothed Time for Region West.7 -0.024941308 0.8861366
## Smoothed Time for Region West.8 0.025928091 0.9125195
## Smoothed Time for Region West.9 0.143220234 1.0451231
## Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4 1.0973901 1.1657175 0.01540870
## Smoothed Time for Region West.5 0.9362533 1.0505033 0.02937210
## Smoothed Time for Region West.6 0.8466945 0.9960519 0.04144374
## Smoothed Time for Region West.7 0.8050693 0.9753672 0.04895041
## Smoothed Time for Region West.8 0.8113792 1.0262671 0.05993567
## Smoothed Time for Region West.9 0.9465316 1.1539839 0.05055386
## p_value
## Smoothed Time for Region West.4 NA
## Smoothed Time for Region West.5 NA
## Smoothed Time for Region West.6 NA
## Smoothed Time for Region West.7 NA
## Smoothed Time for Region West.8 NA
## Smoothed Time for Region West.9 NA
#save the table into a CSV
# write.csv(round(od_all_full_table,5), "./Data/coefficients_GAM_9_1_20_full_data_all_od.csv")
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(od_all_full_table) %in% covariates)
od_all_covariate_table<-(round(od_all_full_table[covariate_Index,], 5))
#rename the variables so that it looks cleaner
rownames(od_all_covariate_table)<-c("Naloxone_Pharmacy_Yes",
"Naloxone_Pharmacy_No",
"Medical_Marijuana",
"Recreational_Marijuana",
"GSL",
"PDMP",
"Medicaid_Expansion",
"Intervention",
"Number of States with DIH Prosecution")
#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
od_all_covariate_table<-rbind(od_all_covariate_table, od_all_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
od_all_covariate_table<-od_all_covariate_table[,-which(colnames(od_all_covariate_table) %in%
c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]
colnames(od_all_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(od_all_covariate_table, 11)
## Risk_Ratio_Estimates RR_95_CI_LB
## Naloxone_Pharmacy_Yes 1.008540e+00 9.950000e-01
## Naloxone_Pharmacy_No 1.004760e+00 9.937600e-01
## Medical_Marijuana 1.042010e+00 1.032090e+00
## Recreational_Marijuana 9.371300e-01 9.234400e-01
## GSL 1.011500e+00 1.001890e+00
## PDMP 1.015620e+00 1.006110e+00
## Medicaid_Expansion 1.118740e+00 1.107440e+00
## Intervention 1.019820e+00 1.010750e+00
## Number of States with DIH Prosecution 1.013820e+00 1.009740e+00
## Intercept/Alabama 2.172383e-05 1.929539e-05
## Alaska 1.249422e+00 1.191148e+00
## RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes 1.022250e+00 2.17470e-01
## Naloxone_Pharmacy_No 1.015880e+00 3.97430e-01
## Medical_Marijuana 1.052020e+00 0.00000e+00
## Recreational_Marijuana 9.510200e-01 0.00000e+00
## GSL 1.021190e+00 1.88600e-02
## PDMP 1.025220e+00 1.24000e-03
## Medicaid_Expansion 1.130140e+00 0.00000e+00
## Intervention 1.028970e+00 2.00000e-05
## Number of States with DIH Prosecution 1.017910e+00 0.00000e+00
## Intercept/Alabama 2.445791e-05 0.00000e+00
## Alaska 1.310548e+00 6.37626e-20
#save the table into a CSV
# write.csv(round(od_all_covariate_table, 5), "./Data/coefficients_covariates_9_6_21_full_data_all_od.csv")
Attributable Deaths
############################## Number of Overdose Deaths Attributed to Intervention ###############################
#find the number of deaths attributable to the intervention
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_od_all<-od_all_data[which(od_all_data$Intervention_Redefined>0),]
#compute the probability of overdose had intervention not occurred
prob_od_no_int_od_all<-expit(-coef(od_all_model)["Intervention_Redefined"]*attr_deaths_anlys_od_all$Intervention_Redefined
+ logit(attr_deaths_anlys_od_all$imputed_deaths/attr_deaths_anlys_od_all$population))
#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(od_all_model) - 1.96*summary(od_all_model)$se
coef_ub<-coef(od_all_model) + 1.96*summary(od_all_model)$se
#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_od_all<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_od_all$Intervention_Redefined
+ logit(attr_deaths_anlys_od_all$imputed_deaths/attr_deaths_anlys_od_all$population))
prob_od_no_int_UB_od_all<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_od_all$Intervention_Redefined
+ logit(attr_deaths_anlys_od_all$imputed_deaths/attr_deaths_anlys_od_all$population))
#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(attr_deaths_anlys_od_all$Time_Period_ID)))
#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention
index<-1 #keep track of where to store the values in the vector
for(time in sort(unique(attr_deaths_anlys_od_all$Time_Period_ID))){
#find the indices of rows where the time point = time
time_point_index<-which(attr_deaths_anlys_od_all$Time_Period_ID == time)
#find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
num_attr_od[index]<-sum(attr_deaths_anlys_od_all$imputed_deaths[time_point_index]
- prob_od_no_int_od_all[time_point_index]*attr_deaths_anlys_od_all$population[time_point_index])
#find the lower and upper bounds of the estimated number of deaths attributable to the intervention
num_attr_od_LB[index]<-sum(attr_deaths_anlys_od_all$imputed_deaths[time_point_index]
- prob_od_no_int_LB_od_all[time_point_index]*attr_deaths_anlys_od_all$population[time_point_index])
num_attr_od_UB[index]<-sum(attr_deaths_anlys_od_all$imputed_deaths[time_point_index]
- prob_od_no_int_UB_od_all[time_point_index]*attr_deaths_anlys_od_all$population[time_point_index])
index<-index + 1
}
num_attr_od_od_all<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_od_all$Time_Period_ID)),
"Time_Start" = sort(unique(attr_deaths_anlys_od_all$Time_Period_Start)),
"Num_Attr_Deaths" = num_attr_od,
"Num_Attr_Deaths_LB" = num_attr_od_LB,
"Num_Attr_Deaths_UB" = num_attr_od_UB)
#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_od_all$Num_Attr_Deaths) #16336.4
## [1] 13948.84
summary(num_attr_od_od_all$Num_Attr_Deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.022 175.954 309.809 348.721 476.751 742.891
num_attr_od_od_all$Time_Start<-as.Date(num_attr_od_od_all$Time_Start)
#compute the 95% CI for the total
sum(num_attr_od_od_all$Num_Attr_Deaths_LB)
## [1] 7631.079
sum(num_attr_od_od_all$Num_Attr_Deaths_UB)
## [1] 20210.63
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_od_all<-num_attr_od_od_all %>%
group_by("year" = year(Time_Start)) %>%
summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
death_ub = sum(Num_Attr_Deaths_UB))
summary(yearly_num_Attr_Deaths_od_all$deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.87 373.89 620.48 697.44 952.41 1428.40
summary(yearly_num_Attr_Deaths_od_all$death_lb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.131 204.535 339.450 381.554 521.050 781.458
summary(yearly_num_Attr_Deaths_od_all$death_ub)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 21.57 541.77 899.02 1010.53 1379.94 2069.59
Sensitivity Analysis 1: Confounding by Indication
Analysis
############################## Sensitivity Analysis 1: Confounding by Indication Analysis ###############################
#create a variable that is equal to 1 just before intervention
#initialize the column to all zeros
main_analysis_data$justBeforeIntervention<-0
#for each state, we first subset the data so it only contains the state's data
for(state in unique(main_analysis_data$State)){
state_data<-main_analysis_data[main_analysis_data$State == state,]
#then, we find the first time point where intervention occurred
index_first_intervention<-which(state_data$Intervention_Redefined>0)[1]
#impute a 1 for the time point right before when intervention first occurs
main_analysis_data$justBeforeIntervention[main_analysis_data$State == state][index_first_intervention-1]<-1
}
#subset the data so that we are only looking at the periods before the intervention occurs for each state
sensitivity_anlys_conf_by_indication_data<-data.frame()
for(state in unique(main_analysis_data$State)){
state_data<-main_analysis_data[main_analysis_data$State == state,]
#we don't include these states because Georgia and Ohio have intervention in 2000
#Hawaii is in this list because it doesn't have an intervention, so we will encounter an error
#if the state is Hawaii, we'll go to the else if condition
if(!(state %in% c("Hawaii", "Georgia", "Ohio"))){
#look for the index where it is just before the intervention
index_first_intervention<-which(state_data$justBeforeIntervention>0)
#add the rows that occur before the intervention to the sensitivity analysis data
sensitivity_anlys_conf_by_indication_data<-rbind(sensitivity_anlys_conf_by_indication_data, state_data[1:(index_first_intervention),])
}else if(state == "Hawaii"){
#Hawaii doesn't have an intervention, so we want to include all the rows of Hawaii
sensitivity_anlys_conf_by_indication_data<-rbind(sensitivity_anlys_conf_by_indication_data, state_data)
}
}
#run the analysis for the sensitivity analysis
sensitivity_anlys_conf_by_indication<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
Naloxone_Pharmacy_Yes_Redefined +
Naloxone_Pharmacy_No_Redefined +
Medical_Marijuana_Redefined +
Recreational_Marijuana_Redefined +
GSL_Redefined +
PDMP_Redefined +
Medicaid_Expansion_Redefined +
justBeforeIntervention +
num_states_w_intervention,
data = sensitivity_anlys_conf_by_indication_data, family = "binomial")
stargazer(sensitivity_anlys_conf_by_indication, type = "html", dep.var.labels = "Unintentional Overdose Deaths")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
Unintentional Overdose Deaths
|
|
|
|
StateAlaska
|
0.595***
|
|
|
(0.045)
|
|
|
|
|
StateArizona
|
0.601***
|
|
|
(0.028)
|
|
|
|
|
StateArkansas
|
-0.324***
|
|
|
(0.037)
|
|
|
|
|
StateCalifornia
|
0.058
|
|
|
(0.047)
|
|
|
|
|
StateColorado
|
0.395***
|
|
|
(0.036)
|
|
|
|
|
StateConnecticut
|
0.855***
|
|
|
(0.052)
|
|
|
|
|
StateDelaware
|
0.288***
|
|
|
(0.039)
|
|
|
|
|
StateFlorida
|
0.702***
|
|
|
(0.048)
|
|
|
|
|
StateHawaii
|
-0.070
|
|
|
(0.044)
|
|
|
|
|
StateIdaho
|
0.055
|
|
|
(0.038)
|
|
|
|
|
StateIllinois
|
0.834***
|
|
|
(0.040)
|
|
|
|
|
StateIndiana
|
0.046
|
|
|
(0.033)
|
|
|
|
|
StateIowa
|
-0.674***
|
|
|
(0.072)
|
|
|
|
|
StateKansas
|
0.044
|
|
|
(0.043)
|
|
|
|
|
StateKentucky
|
0.772***
|
|
|
(0.028)
|
|
|
|
|
StateLouisiana
|
0.512***
|
|
|
(0.041)
|
|
|
|
|
StateMaine
|
0.498***
|
|
|
(0.059)
|
|
|
|
|
StateMaryland
|
-1.267***
|
|
|
(0.099)
|
|
|
|
|
StateMassachusetts
|
0.174***
|
|
|
(0.049)
|
|
|
|
|
StateMichigan
|
0.122***
|
|
|
(0.037)
|
|
|
|
|
StateMinnesota
|
-0.425***
|
|
|
(0.042)
|
|
|
|
|
StateMississippi
|
0.122***
|
|
|
(0.031)
|
|
|
|
|
StateMissouri
|
0.404***
|
|
|
(0.037)
|
|
|
|
|
StateMontana
|
-0.138
|
|
|
(0.091)
|
|
|
|
|
StateNebraska
|
-0.805***
|
|
|
(0.048)
|
|
|
|
|
StateNevada
|
0.873***
|
|
|
(0.043)
|
|
|
|
|
StateNew Hampshire
|
0.313***
|
|
|
(0.055)
|
|
|
|
|
StateNew Jersey
|
0.883***
|
|
|
(0.054)
|
|
|
|
|
StateNew Mexico
|
1.445***
|
|
|
(0.040)
|
|
|
|
|
StateNew York
|
0.271***
|
|
|
(0.047)
|
|
|
|
|
StateNorth Carolina
|
0.450***
|
|
|
(0.032)
|
|
|
|
|
StateNorth Dakota
|
-1.145***
|
|
|
(0.079)
|
|
|
|
|
StateOklahoma
|
0.667***
|
|
|
(0.030)
|
|
|
|
|
StateOregon
|
0.146***
|
|
|
(0.041)
|
|
|
|
|
StatePennsylvania
|
1.173***
|
|
|
(0.061)
|
|
|
|
|
StateRhode Island
|
0.113**
|
|
|
(0.057)
|
|
|
|
|
StateSouth Carolina
|
0.297***
|
|
|
(0.029)
|
|
|
|
|
StateSouth Dakota
|
-1.012***
|
|
|
(0.065)
|
|
|
|
|
StateTennessee
|
0.570***
|
|
|
(0.029)
|
|
|
|
|
StateTexas
|
0.541***
|
|
|
(0.043)
|
|
|
|
|
StateUtah
|
-0.486***
|
|
|
(0.064)
|
|
|
|
|
StateVermont
|
0.215***
|
|
|
(0.070)
|
|
|
|
|
StateVirginia
|
0.400***
|
|
|
(0.039)
|
|
|
|
|
StateWashington
|
0.544***
|
|
|
(0.038)
|
|
|
|
|
StateWest Virginia
|
0.896***
|
|
|
(0.030)
|
|
|
|
|
StateWisconsin
|
0.195***
|
|
|
(0.045)
|
|
|
|
|
StateWyoming
|
0.108*
|
|
|
(0.065)
|
|
|
|
|
Naloxone_Pharmacy_Yes_Redefined
|
-0.063
|
|
|
(0.081)
|
|
|
|
|
Naloxone_Pharmacy_No_Redefined
|
-0.373***
|
|
|
(0.029)
|
|
|
|
|
Medical_Marijuana_Redefined
|
0.049**
|
|
|
(0.023)
|
|
|
|
|
Recreational_Marijuana_Redefined
|
-0.173
|
|
|
(0.127)
|
|
|
|
|
GSL_Redefined
|
-0.024
|
|
|
(0.036)
|
|
|
|
|
PDMP_Redefined
|
-0.081***
|
|
|
(0.015)
|
|
|
|
|
Medicaid_Expansion_Redefined
|
0.090
|
|
|
(0.065)
|
|
|
|
|
justBeforeIntervention
|
-0.051***
|
|
|
(0.012)
|
|
|
|
|
num_states_w_intervention
|
-0.017***
|
|
|
(0.004)
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.9
|
|
|
|
|
|
|
|
|
Constant
|
-9.941***
|
|
|
(0.068)
|
|
|
|
|
|
|
Observations
|
833
|
|
Adjusted R2
|
0.873
|
|
Log Likelihood
|
-4,637.128
|
|
UBRE
|
4.048
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Compile Results
############################## Sensitivity Analysis 1: Make Data Frame of Results and 95% CI ###############################
#store the coefficients of all the terms into a table
sensitivity_anlys_conf_by_indication_full_table<-data.frame(coef(sensitivity_anlys_conf_by_indication))
head(sensitivity_anlys_conf_by_indication_full_table)
## coef.sensitivity_anlys_conf_by_indication.
## (Intercept) -9.94086992
## StateAlaska 0.59454494
## StateArizona 0.60071300
## StateArkansas -0.32446412
## StateCalifornia 0.05809028
## StateColorado 0.39465654
#change the column name to "Coefficient_Estimate"
colnames(sensitivity_anlys_conf_by_indication_full_table)<-c("Coefficient_Estimate")
#store a vector of the covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined",
"Naloxone_Pharmacy_No_Redefined",
"Medical_Marijuana_Redefined",
"Recreational_Marijuana_Redefined",
"GSL_Redefined",
"PDMP_Redefined",
"Medicaid_Expansion_Redefined",
"justBeforeIntervention",
"num_states_w_intervention")
#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_conf_by_indication_full_table))){
#if the coefficient is not in the covariates vector
if(!(rownames(sensitivity_anlys_conf_by_indication_full_table)[i] %in% covariates)){
#we see if it is a state effect
if(substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i], start = 1, stop = 5) == "State"){
#if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
#and just rename these rows to just the state name
rownames(sensitivity_anlys_conf_by_indication_full_table)[i]<-substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i],
start = 6,
stop =
nchar(rownames(sensitivity_anlys_conf_by_indication_full_table)[i]))
}else if(rownames(sensitivity_anlys_conf_by_indication_full_table)[i] == "(Intercept)"){
#otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
rownames(sensitivity_anlys_conf_by_indication_full_table)[i]<-"Intercept/Alabama"
}else if(substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){
#otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
#or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
#and change it to "Smoothed Time for Region"
rownames(sensitivity_anlys_conf_by_indication_full_table)[i]<-paste("Smoothed Time for Region ",
substr(rownames(sensitivity_anlys_conf_by_indication_full_table)[i],
start = 36,
stop =
nchar(rownames(sensitivity_anlys_conf_by_indication_full_table)[i])),
sep = "")
}
}
}
#store the 95% CI
sensitivity_anlys_conf_by_indication_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_conf_by_indication_full_table$Coefficient_Estimate - 1.96*summary(sensitivity_anlys_conf_by_indication)$se
sensitivity_anlys_conf_by_indication_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_conf_by_indication_full_table$Coefficient_Estimate + 1.96*summary(sensitivity_anlys_conf_by_indication)$se
#store the odds ratio estimates and 95% CI
sensitivity_anlys_conf_by_indication_full_table$Odds_Ratio<-exp(sensitivity_anlys_conf_by_indication_full_table$Coefficient_Estimate)
sensitivity_anlys_conf_by_indication_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_conf_by_indication_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_conf_by_indication_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_conf_by_indication_full_table$Coefficient_Upper_Bound)
#store the standard error and p-values
sensitivity_anlys_conf_by_indication_full_table$Standard_Error<-summary(sensitivity_anlys_conf_by_indication)$se
#since there are no p-values for the smoothed time effects, it imputes NA for those rows
sensitivity_anlys_conf_by_indication_full_table$p_value<-c(summary(sensitivity_anlys_conf_by_indication)$p.pv,
rep(NA, length(coef(sensitivity_anlys_conf_by_indication)) - length(summary(sensitivity_anlys_conf_by_indication)$p.pv)))
head(sensitivity_anlys_conf_by_indication_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama -9.94086992 -10.07373090
## Alaska 0.59454494 0.50698675
## Arizona 0.60071300 0.54629247
## Arkansas -0.32446412 -0.39653324
## California 0.05809028 -0.03440845
## Colorado 0.39465654 0.32438942
## Coefficient_Upper_Bound Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama -9.8080089 4.816539e-05 4.217298e-05
## Alaska 0.6821031 1.812206e+00 1.660281e+00
## Arizona 0.6551335 1.823418e+00 1.726839e+00
## Arkansas -0.2523950 7.229146e-01 6.726479e-01
## California 0.1505890 1.059811e+00 9.661768e-01
## Colorado 0.4649237 1.483874e+00 1.383186e+00
## Odds_Ratio_UB Standard_Error p_value
## Intercept/Alabama 5.500926e-05 0.06778622 0.000000e+00
## Alaska 1.978033e+00 0.04467255 2.053321e-40
## Arizona 1.925400e+00 0.02776558 8.384238e-104
## Arkansas 7.769378e-01 0.03676996 1.102813e-18
## California 1.162519e+00 0.04719322 2.183592e-01
## Colorado 1.591893e+00 0.03585057 3.482289e-28
tail(sensitivity_anlys_conf_by_indication_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4 0.5785156 0.4709646
## Smoothed Time for Region West.5 0.7119943 0.5497326
## Smoothed Time for Region West.6 0.8934090 0.6902182
## Smoothed Time for Region West.7 1.1346202 0.8905588
## Smoothed Time for Region West.8 1.4360230 1.1498232
## Smoothed Time for Region West.9 1.6566192 1.3350400
## Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4 0.6860667 1.783389
## Smoothed Time for Region West.5 0.8742559 2.038052
## Smoothed Time for Region West.6 1.0965998 2.443445
## Smoothed Time for Region West.7 1.3786816 3.109992
## Smoothed Time for Region West.8 1.7222227 4.203943
## Smoothed Time for Region West.9 1.9781985 5.241560
## Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4 1.601538 1.985889 0.05487299
## Smoothed Time for Region West.5 1.732790 2.397091 0.08278653
## Smoothed Time for Region West.6 1.994151 2.993969 0.10366877
## Smoothed Time for Region West.7 2.436491 3.969665 0.12452113
## Smoothed Time for Region West.8 3.157635 5.596955 0.14602028
## Smoothed Time for Region West.9 3.800148 7.229707 0.16407104
## p_value
## Smoothed Time for Region West.4 NA
## Smoothed Time for Region West.5 NA
## Smoothed Time for Region West.6 NA
## Smoothed Time for Region West.7 NA
## Smoothed Time for Region West.8 NA
## Smoothed Time for Region West.9 NA
#export the sensitivity analysis confounding by indication full table of estimates as CSV
# write.csv(round(sensitivity_anlys_conf_by_indication_full_table,3), "./Data/coefficients_JustBeforeInd_9_6_21_full_data.csv")
#export out a table with just the covariates:
#find the rows in the full table which contain estimates for the covariates and extract them
covariate_Index<-which(rownames(sensitivity_anlys_conf_by_indication_full_table) %in% covariates)
sensitivity_anlys_conf_by_indication_covariate_table<-(round(sensitivity_anlys_conf_by_indication_full_table[covariate_Index,],5))
#rename these variables so they look nicer
rownames(sensitivity_anlys_conf_by_indication_covariate_table)<-c("Naloxone_Pharmacy_Yes",
"Naloxone_Pharmacy_No",
"Medical_Marijuana",
"Recreational_Marijuana",
"GSL",
"PDMP",
"Medicaid_Expansion",
"Just Before Indicator",
"Number of States w DIH Prosecution")
#put the covariate rows on top and the rest of the data at the bottom
sensitivity_anlys_conf_by_indication_covariate_table<-rbind(sensitivity_anlys_conf_by_indication_covariate_table,
sensitivity_anlys_conf_by_indication_full_table[-covariate_Index,])
#extract the columns that gives the odds ratio estimates
sensitivity_anlys_conf_by_indication_covariate_table<-sensitivity_anlys_conf_by_indication_covariate_table[,-which(colnames(sensitivity_anlys_conf_by_indication_covariate_table) %in% c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]
colnames(sensitivity_anlys_conf_by_indication_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_conf_by_indication_covariate_table, 10)
## Risk_Ratio_Estimates RR_95_CI_LB
## Naloxone_Pharmacy_Yes 9.392000e-01 8.011100e-01
## Naloxone_Pharmacy_No 6.885300e-01 6.508800e-01
## Medical_Marijuana 1.049860e+00 1.002820e+00
## Recreational_Marijuana 8.412200e-01 6.564000e-01
## GSL 9.761000e-01 9.104400e-01
## PDMP 9.222700e-01 8.955400e-01
## Medicaid_Expansion 1.093930e+00 9.624700e-01
## Just Before Indicator 9.499200e-01 9.271800e-01
## Number of States w DIH Prosecution 9.829900e-01 9.756200e-01
## Intercept/Alabama 4.816539e-05 4.217298e-05
## RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes 1.101090e+00 0.43945
## Naloxone_Pharmacy_No 7.283700e-01 0.00000
## Medical_Marijuana 1.099100e+00 0.03749
## Recreational_Marijuana 1.078090e+00 0.17195
## GSL 1.046500e+00 0.49600
## PDMP 9.498000e-01 0.00000
## Medicaid_Expansion 1.243350e+00 0.16932
## Just Before Indicator 9.732000e-01 0.00003
## Number of States w DIH Prosecution 9.904200e-01 0.00001
## Intercept/Alabama 5.500926e-05 0.00000
#export the covariate table into a CSV file
# write.csv(round(sensitivity_anlys_conf_by_indication_covariate_table,3), "./Data/covariates_just_before_intervention_9_6_21_full_data.csv")
Sensitivity Analysis 2: Exclude States with At Least 75% Missing Monthly Data
Analysis
############################## Exclude States from Analysis Based on Missing Data ###############################
#subset the data to be between 2000 and 2019
overdose_monthly_imputed<-read.csv("./Data/od_data_interpolated_unintentional_1999_2019_age_17_up_9_6_21.csv")
od_2000_to_2019<-overdose_monthly_imputed[overdose_monthly_imputed$Year>=2000 & overdose_monthly_imputed$Year<=2019,]
#convert the date to Date objects and the number of deaths to numeric
od_2000_to_2019$Date<-as.Date(as.yearmon(od_2000_to_2019$Month.Code, format = "%Y/%m"))
od_2000_to_2019$Deaths<-as.numeric(od_2000_to_2019$Deaths)
#plot to look at the trend of the outcome and see how much data is missing
ggplot(od_2000_to_2019, aes(x = Date, y = Deaths)) + geom_line() + facet_wrap(vars(State))

#find the states where the proportion of monthly missing data for years 2000 to 2017 is greater than 75%
statesToExcludeCriteria<-sapply(unique(od_2000_to_2019$State), function(state){
mean(is.na(od_2000_to_2019$Deaths[od_2000_to_2019$State == state]))>.75})
statesToExclude<-unique(od_2000_to_2019$State)[statesToExcludeCriteria]
#states excluded: [1] "Alaska" "Montana" "Nebraska" "North Dakota" "South Dakota" "Vermont" "Wyoming"
#calculate the median (and IQR) of missingnness from resulting data
summary_missingness <- od_2000_to_2019 %>%
group_by(State) %>%
summarise(missing = mean(is.na(Deaths)))
median(summary_missingness$missing[!summary_missingness$State %in% statesToExclude])
## [1] 0.02083333
IQR(summary_missingness$missing[!summary_missingness$State %in% statesToExclude])
## [1] 0.1979167
summary(summary_missingness$missing[!summary_missingness$State %in% statesToExclude])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.02083 0.12791 0.19792 0.64583
#include only the states that are not excluded
sensitivity_anlys_exclude_states_data<-main_analysis_data[!(main_analysis_data$State %in% statesToExclude), ]
sensitivity_anlys_exclude_states_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
Naloxone_Pharmacy_Yes_Redefined +
Naloxone_Pharmacy_No_Redefined +
Medical_Marijuana_Redefined +
Recreational_Marijuana_Redefined +
GSL_Redefined +
PDMP_Redefined +
Medicaid_Expansion_Redefined +
Intervention_Redefined +
num_states_w_intervention,
data = sensitivity_anlys_exclude_states_data, family = "binomial")
stargazer(sensitivity_anlys_exclude_states_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
Unintentional Overdose Death
|
|
|
|
StateArizona
|
0.317***
|
|
|
(0.014)
|
|
|
|
|
StateArkansas
|
-0.388***
|
|
|
(0.020)
|
|
|
|
|
StateCalifornia
|
-0.161***
|
|
|
(0.013)
|
|
|
|
|
StateColorado
|
0.099***
|
|
|
(0.016)
|
|
|
|
|
StateConnecticut
|
0.185***
|
|
|
(0.016)
|
|
|
|
|
StateDelaware
|
0.433***
|
|
|
(0.022)
|
|
|
|
|
StateFlorida
|
0.252***
|
|
|
(0.012)
|
|
|
|
|
StateGeorgia
|
-0.071***
|
|
|
(0.013)
|
|
|
|
|
StateHawaii
|
-0.222***
|
|
|
(0.026)
|
|
|
|
|
StateIdaho
|
-0.135***
|
|
|
(0.024)
|
|
|
|
|
StateIllinois
|
-0.017
|
|
|
(0.013)
|
|
|
|
|
StateIndiana
|
0.087***
|
|
|
(0.014)
|
|
|
|
|
StateIowa
|
-0.741***
|
|
|
(0.021)
|
|
|
|
|
StateKansas
|
-0.328***
|
|
|
(0.019)
|
|
|
|
|
StateKentucky
|
0.641***
|
|
|
(0.014)
|
|
|
|
|
StateLouisiana
|
0.294***
|
|
|
(0.014)
|
|
|
|
|
StateMaine
|
0.146***
|
|
|
(0.022)
|
|
|
|
|
StateMaryland
|
-1.066***
|
|
|
(0.019)
|
|
|
|
|
StateMassachusetts
|
0.207***
|
|
|
(0.014)
|
|
|
|
|
StateMichigan
|
-0.020
|
|
|
(0.014)
|
|
|
|
|
StateMinnesota
|
-0.617***
|
|
|
(0.017)
|
|
|
|
|
StateMississippi
|
-0.098***
|
|
|
(0.018)
|
|
|
|
|
StateMissouri
|
0.196***
|
|
|
(0.015)
|
|
|
|
|
StateNevada
|
0.440***
|
|
|
(0.017)
|
|
|
|
|
StateNew Hampshire
|
0.258***
|
|
|
(0.020)
|
|
|
|
|
StateNew Jersey
|
0.106***
|
|
|
(0.013)
|
|
|
|
|
StateNew Mexico
|
0.628***
|
|
|
(0.017)
|
|
|
|
|
StateNew York
|
-0.242***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Carolina
|
0.176***
|
|
|
(0.013)
|
|
|
|
|
StateOhio
|
0.453***
|
|
|
(0.012)
|
|
|
|
|
StateOklahoma
|
0.385***
|
|
|
(0.015)
|
|
|
|
|
StateOregon
|
-0.191***
|
|
|
(0.018)
|
|
|
|
|
StatePennsylvania
|
0.434***
|
|
|
(0.012)
|
|
|
|
|
StateRhode Island
|
0.238***
|
|
|
(0.022)
|
|
|
|
|
StateSouth Carolina
|
0.223***
|
|
|
(0.015)
|
|
|
|
|
StateTennessee
|
0.436***
|
|
|
(0.013)
|
|
|
|
|
StateTexas
|
-0.205***
|
|
|
(0.012)
|
|
|
|
|
StateUtah
|
0.071***
|
|
|
(0.018)
|
|
|
|
|
StateVirginia
|
-0.114***
|
|
|
(0.014)
|
|
|
|
|
StateWashington
|
0.079***
|
|
|
(0.015)
|
|
|
|
|
StateWest Virginia
|
0.877***
|
|
|
(0.015)
|
|
|
|
|
StateWisconsin
|
-0.038***
|
|
|
(0.015)
|
|
|
|
|
Naloxone_Pharmacy_Yes_Redefined
|
-0.021***
|
|
|
(0.008)
|
|
|
|
|
Naloxone_Pharmacy_No_Redefined
|
0.016**
|
|
|
(0.007)
|
|
|
|
|
Medical_Marijuana_Redefined
|
0.061***
|
|
|
(0.006)
|
|
|
|
|
Recreational_Marijuana_Redefined
|
-0.040***
|
|
|
(0.009)
|
|
|
|
|
GSL_Redefined
|
0.034***
|
|
|
(0.006)
|
|
|
|
|
PDMP_Redefined
|
-0.016***
|
|
|
(0.006)
|
|
|
|
|
Medicaid_Expansion_Redefined
|
0.099***
|
|
|
(0.006)
|
|
|
|
|
Intervention_Redefined
|
0.066***
|
|
|
(0.005)
|
|
|
|
|
num_states_w_intervention
|
0.004**
|
|
|
(0.002)
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.9
|
|
|
|
|
|
|
|
|
Constant
|
-9.919***
|
|
|
(0.054)
|
|
|
|
|
|
|
Observations
|
1,720
|
|
Adjusted R2
|
0.910
|
|
Log Likelihood
|
-15,830.150
|
|
UBRE
|
10.177
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Compile Results
############################## Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
sensitivity_anlys_exclude_states_full_table<-data.frame(coef(sensitivity_anlys_exclude_states_model))
#check to see how the table looks
head(sensitivity_anlys_exclude_states_full_table)
## coef.sensitivity_anlys_exclude_states_model.
## (Intercept) -9.91864326
## StateArizona 0.31672571
## StateArkansas -0.38808496
## StateCalifornia -0.16089236
## StateColorado 0.09931829
## StateConnecticut 0.18512065
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_exclude_states_full_table)<-c("Coefficient_Estimate")
#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined",
"Naloxone_Pharmacy_No_Redefined",
"Medical_Marijuana_Redefined",
"Recreational_Marijuana_Redefined",
"GSL_Redefined",
"PDMP_Redefined",
"Medicaid_Expansion_Redefined",
"Intervention_Redefined",
"num_states_w_intervention")
#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_exclude_states_full_table))){
#if the coefficient is not in the covariates vector
if(!(rownames(sensitivity_anlys_exclude_states_full_table)[i] %in% covariates)){
#we see if it's a state effect
if(substr(rownames(sensitivity_anlys_exclude_states_full_table)[i], start = 1, stop = 5) == "State"){
#if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
#and just rename these rows to just the state name
rownames(sensitivity_anlys_exclude_states_full_table)[i]<-substr(rownames(sensitivity_anlys_exclude_states_full_table)[i],
start = 6,
stop = nchar(rownames(sensitivity_anlys_exclude_states_full_table)[i]))
}else if(rownames(sensitivity_anlys_exclude_states_full_table)[i] == "(Intercept)"){
#otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
rownames(sensitivity_anlys_exclude_states_full_table)[i]<-"Intercept/Alabama"
}else if(substr(rownames(sensitivity_anlys_exclude_states_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){
#otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
#or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
#and change it to "Smoothed Time for Region"
rownames(sensitivity_anlys_exclude_states_full_table)[i]<-paste("Smoothed Time for Region ",
substr(rownames(sensitivity_anlys_exclude_states_full_table)[i],
start = 36,
stop = nchar(rownames(sensitivity_anlys_exclude_states_full_table)[i])),
sep = "")
}
}
}
#confidence intervals for the coefficients
sensitivity_anlys_exclude_states_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_exclude_states_full_table$Coefficient_Estimate -
1.96*summary(sensitivity_anlys_exclude_states_model)$se
sensitivity_anlys_exclude_states_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_exclude_states_full_table$Coefficient_Estimate +
1.96*summary(sensitivity_anlys_exclude_states_model)$se
#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_exclude_states_full_table$Odds_Ratio<-exp(sensitivity_anlys_exclude_states_full_table$Coefficient_Estimate)
sensitivity_anlys_exclude_states_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_exclude_states_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_exclude_states_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_exclude_states_full_table$Coefficient_Upper_Bound)
#store the standard error and p-value
sensitivity_anlys_exclude_states_full_table$Standard_Error<-summary(sensitivity_anlys_exclude_states_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_exclude_states_full_table$p_value<-c(summary(sensitivity_anlys_exclude_states_model)$p.pv,
rep(NA, length(coef(sensitivity_anlys_exclude_states_model)) -
length(summary(sensitivity_anlys_exclude_states_model)$p.pv)))
head(sensitivity_anlys_exclude_states_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama -9.91864326 -10.02510428
## Arizona 0.31672571 0.28937143
## Arkansas -0.38808496 -0.42662480
## California -0.16089236 -0.18695392
## Colorado 0.09931829 0.06782206
## Connecticut 0.18512065 0.15458896
## Coefficient_Upper_Bound Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama -9.8121822 4.924793e-05 4.427438e-05
## Arizona 0.3440800 1.372626e+00 1.335588e+00
## Arkansas -0.3495451 6.783547e-01 6.527084e-01
## California -0.1348308 8.513837e-01 8.294820e-01
## Colorado 0.1308145 1.104418e+00 1.070175e+00
## Connecticut 0.2156523 1.203364e+00 1.167178e+00
## Odds_Ratio_UB Standard_Error p_value
## Intercept/Alabama 5.478017e-05 0.05431685 0.000000e+00
## Arizona 1.410691e+00 0.01395626 5.115891e-114
## Arkansas 7.050087e-01 0.01966319 1.045273e-86
## California 8.738638e-01 0.01329671 1.054074e-33
## Colorado 1.139756e+00 0.01606950 6.388067e-10
## Connecticut 1.240671e+00 0.01557739 1.434599e-32
tail(sensitivity_anlys_exclude_states_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4 0.2242012 0.19471674
## Smoothed Time for Region West.5 0.1737596 0.11973538
## Smoothed Time for Region West.6 0.1469437 0.07186594
## Smoothed Time for Region West.7 0.1925901 0.10452667
## Smoothed Time for Region West.8 0.3045692 0.19727128
## Smoothed Time for Region West.9 0.4229533 0.33082820
## Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4 0.2536857 1.251323
## Smoothed Time for Region West.5 0.2277839 1.189770
## Smoothed Time for Region West.6 0.2220214 1.158289
## Smoothed Time for Region West.7 0.2806536 1.212386
## Smoothed Time for Region West.8 0.4118671 1.356041
## Smoothed Time for Region West.9 0.5150784 1.526463
## Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4 1.214967 1.288767 0.01504311
## Smoothed Time for Region West.5 1.127199 1.255814 0.02756339
## Smoothed Time for Region West.6 1.074511 1.248598 0.03830496
## Smoothed Time for Region West.7 1.110185 1.323995 0.04493034
## Smoothed Time for Region West.8 1.218074 1.509634 0.05474383
## Smoothed Time for Region West.9 1.392121 1.673770 0.04700260
## p_value
## Smoothed Time for Region West.4 NA
## Smoothed Time for Region West.5 NA
## Smoothed Time for Region West.6 NA
## Smoothed Time for Region West.7 NA
## Smoothed Time for Region West.8 NA
## Smoothed Time for Region West.9 NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_exclude_states_full_table) %in% covariates)
sensitivity_anlys_exclude_states_covariate_table<-(round(sensitivity_anlys_exclude_states_full_table[covariate_Index,], 5))
#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_exclude_states_covariate_table)<-c("Naloxone_Pharmacy_Yes",
"Naloxone_Pharmacy_No",
"Medical_Marijuana",
"Recreational_Marijuana",
"GSL",
"PDMP",
"Medicaid_Expansion",
"Intervention_Redefined",
"Number of States w DIH Prosecution")
#now, reorganize the data so that the covariates are on top and the rest of the variables are below
sensitivity_anlys_exclude_states_covariate_table<-rbind(sensitivity_anlys_exclude_states_covariate_table, sensitivity_anlys_exclude_states_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_exclude_states_covariate_table<-sensitivity_anlys_exclude_states_covariate_table[,-which(colnames(sensitivity_anlys_exclude_states_covariate_table) %in% c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]
colnames(sensitivity_anlys_exclude_states_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_exclude_states_covariate_table, 10)
## Risk_Ratio_Estimates RR_95_CI_LB
## Naloxone_Pharmacy_Yes 9.791200e-01 9.642100e-01
## Naloxone_Pharmacy_No 1.016630e+00 1.003620e+00
## Medical_Marijuana 1.063350e+00 1.051550e+00
## Recreational_Marijuana 9.608100e-01 9.444600e-01
## GSL 1.034130e+00 1.022700e+00
## PDMP 9.839900e-01 9.727000e-01
## Medicaid_Expansion 1.103900e+00 1.090720e+00
## Intervention_Redefined 1.068040e+00 1.056700e+00
## Number of States w DIH Prosecution 1.003780e+00 1.000140e+00
## Intercept/Alabama 4.924793e-05 4.427438e-05
## RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes 9.942700e-01 0.00708
## Naloxone_Pharmacy_No 1.029800e+00 0.01205
## Medical_Marijuana 1.075280e+00 0.00000
## Recreational_Marijuana 9.774400e-01 0.00000
## GSL 1.045690e+00 0.00000
## PDMP 9.954000e-01 0.00609
## Medicaid_Expansion 1.117240e+00 0.00000
## Intervention_Redefined 1.079490e+00 0.00000
## Number of States w DIH Prosecution 1.007430e+00 0.04194
## Intercept/Alabama 5.478017e-05 0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_exclude_states_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_exclude_states.csv")
Attributable Deaths
###################################### Attributable Deaths #############################
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_exclude_states<-sensitivity_anlys_exclude_states_data[which(sensitivity_anlys_exclude_states_data$Intervention_Redefined>0),]
#compute the probability of overdose had intervention not occurred
prob_od_no_int_exclude_states<-expit(-coef(sensitivity_anlys_exclude_states_model)["Intervention_Redefined"]*attr_deaths_anlys_exclude_states$Intervention_Redefined
+ logit(attr_deaths_anlys_exclude_states$imputed_deaths/attr_deaths_anlys_exclude_states$population))
#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_exclude_states_model) - 1.96*summary(sensitivity_anlys_exclude_states_model)$se
coef_ub<-coef(sensitivity_anlys_exclude_states_model) + 1.96*summary(sensitivity_anlys_exclude_states_model)$se
#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_exclude_states<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_exclude_states$Intervention_Redefined
+ logit(attr_deaths_anlys_exclude_states$imputed_deaths/attr_deaths_anlys_exclude_states$population))
prob_od_no_int_UB_exclude_states<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_exclude_states$Intervention_Redefined
+ logit(attr_deaths_anlys_exclude_states$imputed_deaths/attr_deaths_anlys_exclude_states$population))
#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_exclude_states_data$Time_Period_ID)))
#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention
index<-1 #keep track of where to store the values in the vector
for(time in sort(unique(attr_deaths_anlys_exclude_states$Time_Period_ID))){
#find the indices of rows where the time point = time
time_point_index<-which(attr_deaths_anlys_exclude_states$Time_Period_ID == time)
#find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
num_attr_od[index]<-sum(attr_deaths_anlys_exclude_states$imputed_deaths[time_point_index]
- prob_od_no_int_exclude_states[time_point_index]*attr_deaths_anlys_exclude_states$population[time_point_index])
#find the lower and upper bounds of the estimated number of deaths attributable to the intervention
num_attr_od_LB[index]<-sum(attr_deaths_anlys_exclude_states$imputed_deaths[time_point_index]
- prob_od_no_int_LB_exclude_states[time_point_index]*attr_deaths_anlys_exclude_states$population[time_point_index])
num_attr_od_UB[index]<-sum(attr_deaths_anlys_exclude_states$imputed_deaths[time_point_index]
- prob_od_no_int_UB_exclude_states[time_point_index]*attr_deaths_anlys_exclude_states$population[time_point_index])
index<-index + 1
}
num_attr_od_exclude_states<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_exclude_states$Time_Period_ID)),
"Time_Start" = sort(unique(attr_deaths_anlys_exclude_states$Time_Period_Start)),
"Num_Attr_Deaths" = num_attr_od,
"Num_Attr_Deaths_LB" = num_attr_od_LB,
"Num_Attr_Deaths_UB" = num_attr_od_UB)
#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_exclude_states$Num_Attr_Deaths)
## [1] 34907.92
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_exclude_states<-num_attr_od_exclude_states %>%
group_by("year" = year(Time_Start)) %>%
summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
death_ub = sum(Num_Attr_Deaths_UB))
summary(yearly_num_Attr_Deaths_exclude_states$deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 31.69 797.13 1488.48 1745.40 2428.27 3873.37
Sensitivity Analysis 2b: Excluding Different States
Analysis
############################## Exclude Different States from Analysis ###############################
# main_analysis_data %>%
# group_by(State) %>%
# summarise(sum_int = sum(Intervention_Redefined), sum_deaths = sum(imputed_deaths)) %>%
# arrange(desc(sum_deaths))
# list_of_states <- c("Alabama",
# "Alaska",
# "Arizona",
# "Arkansas",
# "California",
# "Colorado",
# "Connecticut",
# "Delaware",
# "Florida",
# "Georgia",
# "Hawaii",
# "Idaho",
# "Illinois",
# "Indiana",
# "Iowa",
# "Kansas",
# "Kentucky",
# "Louisiana",
# "Maine",
# "Maryland",
# "Massachusetts",
# "Michigan",
# "Minnesota",
# "Mississippi",
# "Missouri",
# "Montana",
# "Nebraska",
# "Nevada",
# "New Hampshire",
# "New Jersey",
# "New Mexico",
# "New York",
# "North Carolina",
# "North Dakota",
# "Ohio",
# "Oklahoma",
# "Oregon",
# "Pennsylvania",
# "Rhode Island",
# "South Carolina",
# "South Dakota",
# "Tennessee",
# "Texas",
# "Utah",
# "Vermont",
# "Virginia",
# "Washington",
# "West Virginia",
# "Wisconsin",
# "Wyoming" )
#use R's built-in list of state names
list_of_states <- c("", state.name)
sensitivity_anlys_exclude_diff_states_tab_results <- data.frame(state = c("", state.name),
coefficient_intervention = rep(NA, 51),
std_error = rep(NA, 51),
p_val = rep(NA, 51))
for(state in list_of_states){
#include only the states that are not excluded
sensitivity_anlys_exclude_diff_states_data <- main_analysis_data %>%
ungroup() %>%
filter(State != state)
sensitivity_anlys_exclude_diff_states_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
Naloxone_Pharmacy_Yes_Redefined +
Naloxone_Pharmacy_No_Redefined +
Medical_Marijuana_Redefined +
Recreational_Marijuana_Redefined +
GSL_Redefined +
PDMP_Redefined +
Medicaid_Expansion_Redefined +
Intervention_Redefined +
num_states_w_intervention,
data = sensitivity_anlys_exclude_diff_states_data, family = "binomial")
#obtain the summary of model
summary_sensitivity_analys_exclude_diff_states <- summary(sensitivity_anlys_exclude_diff_states_model)
#obtain coefficient of intervention
sensitivity_anlys_exclude_diff_states_tab_results$coefficient_intervention[sensitivity_anlys_exclude_diff_states_tab_results$state == state] <- summary_sensitivity_analys_exclude_diff_states$p.coeff["Intervention_Redefined"]
#obtain standard error of intervention
sensitivity_anlys_exclude_diff_states_tab_results$std_error[sensitivity_anlys_exclude_diff_states_tab_results$state == state] <- summary_sensitivity_analys_exclude_diff_states$se["Intervention_Redefined"]
#obtain standard error of intervention
sensitivity_anlys_exclude_diff_states_tab_results$p_val[sensitivity_anlys_exclude_diff_states_tab_results$state == state] <- summary_sensitivity_analys_exclude_diff_states$p.pv["Intervention_Redefined"]
}
sensitivity_anlys_exclude_diff_states_tab_results$state[sensitivity_anlys_exclude_diff_states_tab_results$state == ""] <- "all"
stargazer(sensitivity_anlys_exclude_diff_states_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
Unintentional Overdose Death
|
|
|
|
StateAlaska
|
0.276***
|
|
|
(0.028)
|
|
|
|
|
StateArizona
|
0.314***
|
|
|
(0.014)
|
|
|
|
|
StateArkansas
|
-0.391***
|
|
|
(0.020)
|
|
|
|
|
StateCalifornia
|
-0.160***
|
|
|
(0.013)
|
|
|
|
|
StateColorado
|
0.096***
|
|
|
(0.016)
|
|
|
|
|
StateConnecticut
|
0.187***
|
|
|
(0.016)
|
|
|
|
|
StateDelaware
|
0.428***
|
|
|
(0.022)
|
|
|
|
|
StateFlorida
|
0.253***
|
|
|
(0.012)
|
|
|
|
|
StateGeorgia
|
-0.071***
|
|
|
(0.013)
|
|
|
|
|
StateHawaii
|
-0.227***
|
|
|
(0.026)
|
|
|
|
|
StateIdaho
|
-0.136***
|
|
|
(0.024)
|
|
|
|
|
StateIllinois
|
-0.017
|
|
|
(0.013)
|
|
|
|
|
StateIndiana
|
0.087***
|
|
|
(0.014)
|
|
|
|
|
StateIowa
|
-0.740***
|
|
|
(0.021)
|
|
|
|
|
StateKansas
|
-0.327***
|
|
|
(0.019)
|
|
|
|
|
StateKentucky
|
0.640***
|
|
|
(0.014)
|
|
|
|
|
StateLouisiana
|
0.294***
|
|
|
(0.014)
|
|
|
|
|
StateMaine
|
0.145***
|
|
|
(0.022)
|
|
|
|
|
StateMaryland
|
-1.067***
|
|
|
(0.019)
|
|
|
|
|
StateMassachusetts
|
0.207***
|
|
|
(0.014)
|
|
|
|
|
StateMichigan
|
-0.020
|
|
|
(0.014)
|
|
|
|
|
StateMinnesota
|
-0.619***
|
|
|
(0.017)
|
|
|
|
|
StateMississippi
|
-0.100***
|
|
|
(0.018)
|
|
|
|
|
StateMissouri
|
0.195***
|
|
|
(0.015)
|
|
|
|
|
StateMontana
|
-0.359***
|
|
|
(0.029)
|
|
|
|
|
StateNebraska
|
-0.883***
|
|
|
(0.029)
|
|
|
|
|
StateNevada
|
0.439***
|
|
|
(0.017)
|
|
|
|
|
StateNew Hampshire
|
0.257***
|
|
|
(0.020)
|
|
|
|
|
StateNew Jersey
|
0.106***
|
|
|
(0.013)
|
|
|
|
|
StateNew Mexico
|
0.629***
|
|
|
(0.017)
|
|
|
|
|
StateNew York
|
-0.240***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Carolina
|
0.177***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Dakota
|
-1.054***
|
|
|
(0.045)
|
|
|
|
|
StateOhio
|
0.454***
|
|
|
(0.012)
|
|
|
|
|
StateOklahoma
|
0.385***
|
|
|
(0.015)
|
|
|
|
|
StateOregon
|
-0.195***
|
|
|
(0.018)
|
|
|
|
|
StatePennsylvania
|
0.435***
|
|
|
(0.012)
|
|
|
|
|
StateRhode Island
|
0.237***
|
|
|
(0.022)
|
|
|
|
|
StateSouth Carolina
|
0.221***
|
|
|
(0.015)
|
|
|
|
|
StateSouth Dakota
|
-0.960***
|
|
|
(0.043)
|
|
|
|
|
StateTennessee
|
0.436***
|
|
|
(0.013)
|
|
|
|
|
StateTexas
|
-0.204***
|
|
|
(0.012)
|
|
|
|
|
StateUtah
|
0.073***
|
|
|
(0.018)
|
|
|
|
|
StateVermont
|
-0.168***
|
|
|
(0.031)
|
|
|
|
|
StateVirginia
|
-0.113***
|
|
|
(0.014)
|
|
|
|
|
StateWashington
|
0.077***
|
|
|
(0.015)
|
|
|
|
|
StateWest Virginia
|
0.875***
|
|
|
(0.015)
|
|
|
|
|
StateWisconsin
|
-0.037**
|
|
|
(0.015)
|
|
|
|
|
Naloxone_Pharmacy_Yes_Redefined
|
-0.022***
|
|
|
(0.008)
|
|
|
|
|
Naloxone_Pharmacy_No_Redefined
|
0.010
|
|
|
(0.007)
|
|
|
|
|
Medical_Marijuana_Redefined
|
0.063***
|
|
|
(0.006)
|
|
|
|
|
Recreational_Marijuana_Redefined
|
-0.037***
|
|
|
(0.009)
|
|
|
|
|
GSL_Redefined
|
0.034***
|
|
|
(0.006)
|
|
|
|
|
PDMP_Redefined
|
-0.019***
|
|
|
(0.006)
|
|
|
|
|
Medicaid_Expansion_Redefined
|
0.101***
|
|
|
(0.006)
|
|
|
|
|
Intervention_Redefined
|
0.061***
|
|
|
(0.005)
|
|
|
|
|
num_states_w_intervention
|
0.004**
|
|
|
(0.002)
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.9
|
|
|
|
|
|
|
|
|
Constant
|
-9.912***
|
|
|
(0.054)
|
|
|
|
|
|
|
Observations
|
1,960
|
|
Adjusted R2
|
0.911
|
|
Log Likelihood
|
-16,603.040
|
|
UBRE
|
8.990
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Compile Results
############################## Make Data Frame of Results and 95% CI ###############################
#store the coefficients into the table
sensitivity_anlys_exclude_diff_states_full_table<-data.frame(coef(sensitivity_anlys_exclude_diff_states_model))
#check to see how the table looks
head(sensitivity_anlys_exclude_diff_states_full_table)
## coef.sensitivity_anlys_exclude_diff_states_model.
## (Intercept) -9.91163324
## StateAlaska 0.27649991
## StateArizona 0.31444075
## StateArkansas -0.39099480
## StateCalifornia -0.16008628
## StateColorado 0.09582859
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_exclude_diff_states_full_table)<-c("Coefficient_Estimate")
#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined",
"Naloxone_Pharmacy_No_Redefined",
"Medical_Marijuana_Redefined",
"Recreational_Marijuana_Redefined",
"GSL_Redefined",
"PDMP_Redefined",
"Medicaid_Expansion_Redefined",
"Intervention_Redefined",
"num_states_w_intervention")
#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_exclude_diff_states_full_table))){
#if the coefficient is not in the covariates vector
if(!(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i] %in% covariates)){
#we see if it's a state effect
if(substr(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i], start = 1, stop = 5) == "State"){
#if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
#and just rename these rows to just the state name
rownames(sensitivity_anlys_exclude_diff_states_full_table)[i]<-substr(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i],
start = 6,
stop = nchar(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i]))
}else if(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i] == "(Intercept)"){
#otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
rownames(sensitivity_anlys_exclude_diff_states_full_table)[i]<-"Intercept/Alabama"
}else if(substr(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){
#otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
#or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
#and change it to "Smoothed Time for Region"
rownames(sensitivity_anlys_exclude_diff_states_full_table)[i]<-paste("Smoothed Time for Region ",
substr(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i],
start = 36,
stop = nchar(rownames(sensitivity_anlys_exclude_diff_states_full_table)[i])),
sep = "")
}
}
}
#confidence intervals for the coefficients
sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Estimate -
1.96*summary(sensitivity_anlys_exclude_diff_states_model)$se
sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Estimate +
1.96*summary(sensitivity_anlys_exclude_diff_states_model)$se
#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_exclude_diff_states_full_table$Odds_Ratio<-exp(sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Estimate)
sensitivity_anlys_exclude_diff_states_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_exclude_diff_states_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_exclude_diff_states_full_table$Coefficient_Upper_Bound)
#store the standard error and p-value
sensitivity_anlys_exclude_diff_states_full_table$Standard_Error<-summary(sensitivity_anlys_exclude_diff_states_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_exclude_diff_states_full_table$p_value<-c(summary(sensitivity_anlys_exclude_diff_states_model)$p.pv,
rep(NA, length(coef(sensitivity_anlys_exclude_diff_states_model)) -
length(summary(sensitivity_anlys_exclude_diff_states_model)$p.pv)))
head(sensitivity_anlys_exclude_diff_states_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama -9.91163324 -10.01759074
## Alaska 0.27649991 0.22240681
## Arizona 0.31444075 0.28711934
## Arkansas -0.39099480 -0.42951459
## California -0.16008628 -0.18609870
## Colorado 0.09582859 0.06439146
## Coefficient_Upper_Bound Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama -9.8056757 4.959437e-05 4.460829e-05
## Alaska 0.3305930 1.318507e+00 1.249079e+00
## Arizona 0.3417622 1.369493e+00 1.332583e+00
## Arkansas -0.3524750 6.763837e-01 6.508249e-01
## California -0.1340739 8.520703e-01 8.301917e-01
## Colorado 0.1272657 1.100570e+00 1.066510e+00
## Odds_Ratio_UB Standard_Error p_value
## Intercept/Alabama 5.513776e-05 0.05405995 0.000000e+00
## Alaska 1.391793e+00 0.02759852 1.262165e-23
## Arizona 1.407426e+00 0.01393949 1.132387e-112
## Arkansas 7.029461e-01 0.01965296 4.499620e-88
## California 8.745255e-01 0.01327164 1.670923e-33
## Colorado 1.135719e+00 0.01603935 2.306666e-09
tail(sensitivity_anlys_exclude_diff_states_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4 0.2327125 0.20342625
## Smoothed Time for Region West.5 0.1811413 0.12741555
## Smoothed Time for Region West.6 0.1557749 0.08107344
## Smoothed Time for Region West.7 0.1970045 0.10945270
## Smoothed Time for Region West.8 0.3049141 0.19813490
## Smoothed Time for Region West.9 0.4192418 0.32763324
## Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4 0.2619987 1.262019
## Smoothed Time for Region West.5 0.2348670 1.198585
## Smoothed Time for Region West.6 0.2304764 1.168563
## Smoothed Time for Region West.7 0.2845563 1.217750
## Smoothed Time for Region West.8 0.4116933 1.356508
## Smoothed Time for Region West.9 0.5108504 1.520808
## Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4 1.225595 1.299525 0.01494196
## Smoothed Time for Region West.5 1.135889 1.264741 0.02741110
## Smoothed Time for Region West.6 1.084451 1.259200 0.03811301
## Smoothed Time for Region West.7 1.115667 1.329172 0.04466929
## Smoothed Time for Region West.8 1.219127 1.509371 0.05447918
## Smoothed Time for Region West.9 1.387680 1.666708 0.04673908
## p_value
## Smoothed Time for Region West.4 NA
## Smoothed Time for Region West.5 NA
## Smoothed Time for Region West.6 NA
## Smoothed Time for Region West.7 NA
## Smoothed Time for Region West.8 NA
## Smoothed Time for Region West.9 NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_exclude_diff_states_full_table) %in% covariates)
sensitivity_anlys_exclude_diff_states_covariate_table<-(round(sensitivity_anlys_exclude_diff_states_full_table[covariate_Index,], 5))
#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_exclude_diff_states_covariate_table)<-c("Naloxone_Pharmacy_Yes",
"Naloxone_Pharmacy_No",
"Medical_Marijuana",
"Recreational_Marijuana",
"GSL",
"PDMP",
"Medicaid_Expansion",
"Intervention_Redefined",
"Number of States w DIH Prosecution")
#now, reorganize the data so that the covariates are on top and the rest of the variables are below
sensitivity_anlys_exclude_diff_states_covariate_table<-rbind(sensitivity_anlys_exclude_diff_states_covariate_table, sensitivity_anlys_exclude_diff_states_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_exclude_diff_states_covariate_table<-sensitivity_anlys_exclude_diff_states_covariate_table[,-which(colnames(sensitivity_anlys_exclude_diff_states_covariate_table) %in% c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]
colnames(sensitivity_anlys_exclude_diff_states_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_exclude_diff_states_covariate_table, 10)
## Risk_Ratio_Estimates RR_95_CI_LB
## Naloxone_Pharmacy_Yes 9.781900e-01 9.634000e-01
## Naloxone_Pharmacy_No 1.010420e+00 9.976000e-01
## Medical_Marijuana 1.065140e+00 1.053410e+00
## Recreational_Marijuana 9.635700e-01 9.473700e-01
## GSL 1.034920e+00 1.023560e+00
## PDMP 9.808700e-01 9.697600e-01
## Medicaid_Expansion 1.105790e+00 1.092700e+00
## Intervention_Redefined 1.062540e+00 1.051400e+00
## Number of States w DIH Prosecution 1.003730e+00 1.000110e+00
## Intercept/Alabama 4.959437e-05 4.460829e-05
## RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes 9.932000e-01 0.00454
## Naloxone_Pharmacy_No 1.023410e+00 0.11160
## Medical_Marijuana 1.077010e+00 0.00000
## Recreational_Marijuana 9.800500e-01 0.00002
## GSL 1.046400e+00 0.00000
## PDMP 9.921000e-01 0.00088
## Medicaid_Expansion 1.119030e+00 0.00000
## Intervention_Redefined 1.073800e+00 0.00000
## Number of States w DIH Prosecution 1.007370e+00 0.04315
## Intercept/Alabama 5.513776e-05 0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_exclude_diff_states_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_exclude_diff_states.csv")
Attributable Deaths
###################################### Attributable Deaths #############################
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_exclude_diff_states<-sensitivity_anlys_exclude_diff_states_data[which(sensitivity_anlys_exclude_diff_states_data$Intervention_Redefined>0),]
#compute the probability of overdose had intervention not occurred
prob_od_no_int_exclude_diff_states<-expit(-coef(sensitivity_anlys_exclude_diff_states_model)["Intervention_Redefined"]*attr_deaths_anlys_exclude_diff_states$Intervention_Redefined
+ logit(attr_deaths_anlys_exclude_diff_states$imputed_deaths/attr_deaths_anlys_exclude_diff_states$population))
#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_exclude_diff_states_model) - 1.96*summary(sensitivity_anlys_exclude_diff_states_model)$se
coef_ub<-coef(sensitivity_anlys_exclude_diff_states_model) + 1.96*summary(sensitivity_anlys_exclude_diff_states_model)$se
#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_exclude_diff_states<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_exclude_diff_states$Intervention_Redefined
+ logit(attr_deaths_anlys_exclude_diff_states$imputed_deaths/attr_deaths_anlys_exclude_diff_states$population))
prob_od_no_int_UB_exclude_diff_states<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_exclude_diff_states$Intervention_Redefined
+ logit(attr_deaths_anlys_exclude_diff_states$imputed_deaths/attr_deaths_anlys_exclude_diff_states$population))
#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_exclude_diff_states_data$Time_Period_ID)))
#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention
index<-1 #keep track of where to store the values in the vector
for(time in sort(unique(attr_deaths_anlys_exclude_diff_states$Time_Period_ID))){
#find the indices of rows where the time point = time
time_point_index<-which(attr_deaths_anlys_exclude_diff_states$Time_Period_ID == time)
#find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
num_attr_od[index]<-sum(attr_deaths_anlys_exclude_diff_states$imputed_deaths[time_point_index]
- prob_od_no_int_exclude_diff_states[time_point_index]*attr_deaths_anlys_exclude_diff_states$population[time_point_index])
#find the lower and upper bounds of the estimated number of deaths attributable to the intervention
num_attr_od_LB[index]<-sum(attr_deaths_anlys_exclude_diff_states$imputed_deaths[time_point_index]
- prob_od_no_int_LB_exclude_diff_states[time_point_index]*attr_deaths_anlys_exclude_diff_states$population[time_point_index])
num_attr_od_UB[index]<-sum(attr_deaths_anlys_exclude_diff_states$imputed_deaths[time_point_index]
- prob_od_no_int_UB_exclude_diff_states[time_point_index]*attr_deaths_anlys_exclude_diff_states$population[time_point_index])
index<-index + 1
}
num_attr_od_exclude_diff_states<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_exclude_diff_states$Time_Period_ID)),
"Time_Start" = sort(unique(attr_deaths_anlys_exclude_diff_states$Time_Period_Start)),
"Num_Attr_Deaths" = num_attr_od,
"Num_Attr_Deaths_LB" = num_attr_od_LB,
"Num_Attr_Deaths_UB" = num_attr_od_UB)
#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_exclude_diff_states$Num_Attr_Deaths)
## [1] 32470.68
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_exclude_diff_states<-num_attr_od_exclude_diff_states %>%
group_by("year" = year(Time_Start)) %>%
summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
death_ub = sum(Num_Attr_Deaths_UB))
summary(yearly_num_Attr_Deaths_exclude_diff_states$deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 29.27 739.61 1381.41 1623.53 2259.55 3613.31
Sensitivity Analysis 3: Divide Unintentional Deaths Equally Amongst Missing Months
Analysis
### Sensitivity Analysis 3: Divide Unaccountable Deaths Equally Among Missing Months #####
od_data <- read.csv("./Data/unintentional_od_1999_2019_age_17_up.txt", sep = "\t", stringsAsFactors = FALSE)
od_data$Deaths <- as.numeric(od_data$Deaths)
od_data<-od_data[!is.na(od_data$Year),] #delete the rows that just contains data set description info
tail(od_data)
## Notes State State.Code Year Year.Code Month Month.Code Deaths
## 12595 Wyoming 56 2019 2019 Jul., 2019 2019/07 NA
## 12596 Wyoming 56 2019 2019 Aug., 2019 2019/08 NA
## 12597 Wyoming 56 2019 2019 Sep., 2019 2019/09 NA
## 12598 Wyoming 56 2019 2019 Oct., 2019 2019/10 NA
## 12599 Wyoming 56 2019 2019 Nov., 2019 2019/11 NA
## 12600 Wyoming 56 2019 2019 Dec., 2019 2019/12 NA
## Population Crude.Rate
## 12595 Not Applicable Not Applicable
## 12596 Not Applicable Not Applicable
## 12597 Not Applicable Not Applicable
## 12598 Not Applicable Not Applicable
## 12599 Not Applicable Not Applicable
## 12600 Not Applicable Not Applicable
sum(is.na(od_data$Deaths))
## [1] 3148
#set up the overdose data to impute the missing data
#set up the dates
od_data$Date<-mdy(od_data$Month)
length(unique(od_data$State))
## [1] 50
#DC is being counted in this, but we do not have data on prosecutions. Remove these rows
od_data<-od_data[od_data$State!="District of Columbia",]
#interpolate the missing values
od_year <- read.csv("./Data/unintentional_od_yearly_1999_2019_age_17_up.txt", sep = "\t", stringsAsFactors = FALSE)
od_year$Deaths <- as.numeric(od_year$Deaths)
head(od_year, 50)
## Notes State State.Code Year Year.Code Deaths Population Crude.Rate
## 1 Alabama 1 1999 1999 115 3308855 3.5
## 2 Alabama 1 2000 2000 126 3323678 3.8
## 3 Alabama 1 2001 2001 163 3347225 4.9
## 4 Alabama 1 2002 2002 167 3363499 5.0
## 5 Alabama 1 2003 2003 150 3390408 4.4
## 6 Alabama 1 2004 2004 220 3417067 6.4
## 7 Alabama 1 2005 2005 215 3452576 6.2
## 8 Alabama 1 2006 2006 308 3502183 8.8
## 9 Alabama 1 2007 2007 416 3540544 11.7
## 10 Alabama 1 2008 2008 482 3583279 13.5
## 11 Alabama 1 2009 2009 535 3623746 14.8
## 12 Alabama 1 2010 2010 462 3647277 12.7
## 13 Alabama 1 2011 2011 484 3675597 13.2
## 14 Alabama 1 2012 2012 473 3697617 12.8
## 15 Alabama 1 2013 2013 520 3722241 14.0
## 16 Alabama 1 2014 2014 637 3741806 17.0
## 17 Alabama 1 2015 2015 653 3755483 17.4
## 18 Alabama 1 2016 2016 680 3766477 18.1
## 19 Alabama 1 2017 2017 750 3779274 19.8
## 20 Alabama 1 2018 2018 683 3798031 18.0
## 21 Alabama 1 2019 2019 695 3814879 18.2
## 22 Total Alabama 1 NA NA 8934 75251742 11.9
## 23 Alaska 2 1999 1999 27 433357 6.2
## 24 Alaska 2 2000 2000 35 436215 8.0
## 25 Alaska 2 2001 2001 52 444943 11.7
## 26 Alaska 2 2002 2002 69 453855 15.2
## 27 Alaska 2 2003 2003 69 461571 14.9
## 28 Alaska 2 2004 2004 62 472951 13.1
## 29 Alaska 2 2005 2005 61 481642 12.7
## 30 Alaska 2 2006 2006 66 489722 13.5
## 31 Alaska 2 2007 2007 63 495956 12.7
## 32 Alaska 2 2008 2008 88 504331 17.4
## 33 Alaska 2 2009 2009 99 512544 19.3
## 34 Alaska 2 2010 2010 74 522853 14.2
## 35 Alaska 2 2011 2011 94 534277 17.6
## 36 Alaska 2 2012 2012 112 544349 20.6
## 37 Alaska 2 2013 2013 92 547000 16.8
## 38 Alaska 2 2014 2014 103 550189 18.7
## 39 Alaska 2 2015 2015 105 552166 19.0
## 40 Alaska 2 2016 2016 102 554567 18.4
## 41 Alaska 2 2017 2017 123 554867 22.2
## 42 Alaska 2 2018 2018 94 553622 17.0
## 43 Alaska 2 2019 2019 112 551562 20.3
## 44 Total Alaska 2 NA NA 1702 10652539 16.0
## 45 Arizona 4 1999 1999 360 3691428 9.8
## 46 Arizona 4 2000 2000 397 3763685 10.5
## 47 Arizona 4 2001 2001 418 3874462 10.8
## 48 Arizona 4 2002 2002 457 3968317 11.5
## 49 Arizona 4 2003 2003 507 4056693 12.5
## 50 Arizona 4 2004 2004 537 4167950 12.9
#see how many states have missing yearly entries and use the totals to impute the missing yearly values.
sum_na <- od_year %>% group_by(State) %>% summarise(sum(is.na(Deaths)))
table(sum_na)
## sum(is.na(Deaths))
## State 0 4 5 72
## 0 0 0 1
## Alabama 1 0 0 0
## Alaska 1 0 0 0
## Arizona 1 0 0 0
## Arkansas 1 0 0 0
## California 1 0 0 0
## Colorado 1 0 0 0
## Connecticut 1 0 0 0
## Delaware 1 0 0 0
## Florida 1 0 0 0
## Georgia 1 0 0 0
## Hawaii 1 0 0 0
## Idaho 1 0 0 0
## Illinois 1 0 0 0
## Indiana 1 0 0 0
## Iowa 1 0 0 0
## Kansas 1 0 0 0
## Kentucky 1 0 0 0
## Louisiana 1 0 0 0
## Maine 1 0 0 0
## Maryland 1 0 0 0
## Massachusetts 1 0 0 0
## Michigan 1 0 0 0
## Minnesota 1 0 0 0
## Mississippi 1 0 0 0
## Missouri 1 0 0 0
## Montana 1 0 0 0
## Nebraska 1 0 0 0
## Nevada 1 0 0 0
## New Hampshire 1 0 0 0
## New Jersey 1 0 0 0
## New Mexico 1 0 0 0
## New York 1 0 0 0
## North Carolina 1 0 0 0
## North Dakota 0 1 0 0
## Ohio 1 0 0 0
## Oklahoma 1 0 0 0
## Oregon 1 0 0 0
## Pennsylvania 1 0 0 0
## Rhode Island 0 1 0 0
## South Carolina 1 0 0 0
## South Dakota 0 0 1 0
## Tennessee 1 0 0 0
## Texas 1 0 0 0
## Utah 1 0 0 0
## Vermont 1 0 0 0
## Virginia 1 0 0 0
## Washington 1 0 0 0
## West Virginia 1 0 0 0
## Wisconsin 1 0 0 0
## Wyoming 1 0 0 0
#the 3 states are: North Dakota, South Dakota, and Rhode Island
od_year$Deaths[od_year$State == "North Dakota" & is.na(od_year$Deaths)] <- (od_year$Deaths[od_year$State == "North Dakota" &
od_year$Notes == "Total"] -
sum(od_year$Deaths[od_year$State == "North Dakota" &
od_year$Notes != "Total"],
na.rm = TRUE))/sum(is.na(od_year$Deaths[od_year$State == "North Dakota"]))
od_year$Deaths[od_year$State == "South Dakota" & is.na(od_year$Deaths)] <- (od_year$Deaths[od_year$State == "South Dakota" &
od_year$Notes == "Total"] -
sum(od_year$Deaths[od_year$State == "South Dakota" &
od_year$Notes != "Total"],
na.rm = TRUE))/sum(is.na(od_year$Deaths[od_year$State == "South Dakota"]))
od_year$Deaths[od_year$State == "Rhode Island" & is.na(od_year$Deaths)] <- (od_year$Deaths[od_year$State == "Rhode Island" &
od_year$Notes == "Total"] -
sum(od_year$Deaths[od_year$State == "Rhode Island" &
od_year$Notes != "Total"],
na.rm = TRUE))/sum(is.na(od_year$Deaths[od_year$State == "Rhode Island"]))
od_year <- od_year[!is.na(od_year$Year),]
tail(od_year)
## Notes State State.Code Year Year.Code Deaths Population Crude.Rate
## 1094 Wyoming 56 2014 2014 87 445830 19.5
## 1095 Wyoming 56 2015 2015 79 447212 17.7
## 1096 Wyoming 56 2016 2016 79 446600 17.7
## 1097 Wyoming 56 2017 2017 53 442832 12.0
## 1098 Wyoming 56 2018 2018 54 442962 12.2
## 1099 Wyoming 56 2019 2019 68 445025 15.3
od_data$imputed_vals<-rep(NA, nrow(od_data))
startYear<-1999
for(state in unique(od_data$State)){
#get the values of the deaths for state
currentDeaths<-od_data$Deaths[od_data$State == state]
for(year in startYear:2019){
#find the indices of the missing data -- gets the missing indices for that particular year
indexMissing <- which(is.na(currentDeaths[(1:12) + 12*(year - startYear)]))
if(length(indexMissing) != 0){
#if there are missing values, we find the number of accounted deaths
currentDeathsTotal <- sum(currentDeaths[(1:12) + 12*(year - startYear)], na.rm = TRUE)
#and calculate the deaths that are not accounted for using the yearly deaths for the state
numNotAccounted <- od_year$Deaths[od_year$State == state & od_year$Year == year] - currentDeathsTotal
#we then divide number of unaccounted deaths evenly by the number of months with missing values
currentDeaths[(1:12) + 12*(year - startYear)][indexMissing] <- numNotAccounted/length(indexMissing)
}else{
#otherwise, if there is no missing values, we skip to the next year
next
}
}
#store the imputed values
od_data$imputed_vals[od_data$State == state]<-currentDeaths
}
#group into 6 month time periods now and compute the total number of deaths in each period
od_data_grouped_data <- od_data %>%
mutate(Time_Period_Start = lubridate::floor_date(Date , "6 months" )) %>%
group_by(State, Time_Period_Start) %>%
summarise(sum_deaths = sum(imputed_vals, na.rm = TRUE))
#restrict the dataset to be between 2000 and 2017
od_data_grouped_data <- od_data_grouped_data[year(od_data_grouped_data$Time_Period_Start) > 1999 &
year(od_data_grouped_data$Time_Period_Start) < 2020,]
#create a new dataset for the analysis using columns from the main analysis data
sensitivity_anlys_imputed_od_wo_interp_data <- main_analysis_data %>%
ungroup() %>%
mutate(imputed_deaths_no_interp = od_data_grouped_data$sum_deaths,
num_alive_no_interp = population - imputed_deaths_no_interp) %>%
dplyr::select(-c(imputed_deaths, num_alive)) #want to remove the outcome from main analysis to not get confused
#compute the number of states with intervention
sensitivity_anlys_imputed_od_wo_interp_data <- sensitivity_anlys_imputed_od_wo_interp_data %>%
group_by(Time_Period_Start) %>%
mutate(num_states_with_intervention = sum(Intervention_Redefined))
#run the model for analysis
sensitivity_anlys_imputed_od_wo_interp <- gam(cbind(round(imputed_deaths_no_interp),
round(num_alive_no_interp))~ State +
s(Time_Period_ID, bs = "cr",
by = as.factor(Region)) +
Naloxone_Pharmacy_Yes_Redefined +
Naloxone_Pharmacy_No_Redefined +
Medical_Marijuana_Redefined +
Recreational_Marijuana_Redefined +
GSL_Redefined +
PDMP_Redefined +
Medicaid_Expansion_Redefined +
Intervention_Redefined +
num_states_w_intervention,
data = sensitivity_anlys_imputed_od_wo_interp_data,
family = "binomial")
stargazer(sensitivity_anlys_imputed_od_wo_interp, type = "html", dep.var.labels = "Unintentional Overdose Death")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
Unintentional Overdose Death
|
|
|
|
StateAlaska
|
0.277***
|
|
|
(0.028)
|
|
|
|
|
StateArizona
|
0.315***
|
|
|
(0.014)
|
|
|
|
|
StateArkansas
|
-0.390***
|
|
|
(0.020)
|
|
|
|
|
StateCalifornia
|
-0.159***
|
|
|
(0.013)
|
|
|
|
|
StateColorado
|
0.097***
|
|
|
(0.016)
|
|
|
|
|
StateConnecticut
|
0.189***
|
|
|
(0.016)
|
|
|
|
|
StateDelaware
|
0.430***
|
|
|
(0.022)
|
|
|
|
|
StateFlorida
|
0.253***
|
|
|
(0.012)
|
|
|
|
|
StateGeorgia
|
-0.071***
|
|
|
(0.013)
|
|
|
|
|
StateHawaii
|
-0.226***
|
|
|
(0.026)
|
|
|
|
|
StateIdaho
|
-0.136***
|
|
|
(0.024)
|
|
|
|
|
StateIllinois
|
-0.016
|
|
|
(0.013)
|
|
|
|
|
StateIndiana
|
0.088***
|
|
|
(0.014)
|
|
|
|
|
StateIowa
|
-0.740***
|
|
|
(0.021)
|
|
|
|
|
StateKansas
|
-0.328***
|
|
|
(0.019)
|
|
|
|
|
StateKentucky
|
0.641***
|
|
|
(0.014)
|
|
|
|
|
StateLouisiana
|
0.294***
|
|
|
(0.014)
|
|
|
|
|
StateMaine
|
0.145***
|
|
|
(0.022)
|
|
|
|
|
StateMaryland
|
-1.066***
|
|
|
(0.019)
|
|
|
|
|
StateMassachusetts
|
0.208***
|
|
|
(0.014)
|
|
|
|
|
StateMichigan
|
-0.019
|
|
|
(0.014)
|
|
|
|
|
StateMinnesota
|
-0.618***
|
|
|
(0.017)
|
|
|
|
|
StateMississippi
|
-0.100***
|
|
|
(0.018)
|
|
|
|
|
StateMissouri
|
0.194***
|
|
|
(0.015)
|
|
|
|
|
StateMontana
|
-0.360***
|
|
|
(0.029)
|
|
|
|
|
StateNebraska
|
-0.883***
|
|
|
(0.029)
|
|
|
|
|
StateNevada
|
0.439***
|
|
|
(0.017)
|
|
|
|
|
StateNew Hampshire
|
0.258***
|
|
|
(0.020)
|
|
|
|
|
StateNew Jersey
|
0.107***
|
|
|
(0.013)
|
|
|
|
|
StateNew Mexico
|
0.631***
|
|
|
(0.017)
|
|
|
|
|
StateNew York
|
-0.238***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Carolina
|
0.177***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Dakota
|
-1.053***
|
|
|
(0.045)
|
|
|
|
|
StateOhio
|
0.455***
|
|
|
(0.012)
|
|
|
|
|
StateOklahoma
|
0.385***
|
|
|
(0.015)
|
|
|
|
|
StateOregon
|
-0.194***
|
|
|
(0.018)
|
|
|
|
|
StatePennsylvania
|
0.436***
|
|
|
(0.012)
|
|
|
|
|
StateRhode Island
|
0.238***
|
|
|
(0.022)
|
|
|
|
|
StateSouth Carolina
|
0.221***
|
|
|
(0.015)
|
|
|
|
|
StateSouth Dakota
|
-0.941***
|
|
|
(0.042)
|
|
|
|
|
StateTennessee
|
0.436***
|
|
|
(0.013)
|
|
|
|
|
StateTexas
|
-0.204***
|
|
|
(0.012)
|
|
|
|
|
StateUtah
|
0.072***
|
|
|
(0.018)
|
|
|
|
|
StateVermont
|
-0.167***
|
|
|
(0.031)
|
|
|
|
|
StateVirginia
|
-0.113***
|
|
|
(0.014)
|
|
|
|
|
StateWashington
|
0.078***
|
|
|
(0.015)
|
|
|
|
|
StateWest Virginia
|
0.876***
|
|
|
(0.015)
|
|
|
|
|
StateWisconsin
|
-0.037**
|
|
|
(0.015)
|
|
|
|
|
StateWyoming
|
0.024
|
|
|
(0.034)
|
|
|
|
|
Naloxone_Pharmacy_Yes_Redefined
|
-0.024***
|
|
|
(0.008)
|
|
|
|
|
Naloxone_Pharmacy_No_Redefined
|
0.009
|
|
|
(0.007)
|
|
|
|
|
Medical_Marijuana_Redefined
|
0.063***
|
|
|
(0.006)
|
|
|
|
|
Recreational_Marijuana_Redefined
|
-0.037***
|
|
|
(0.009)
|
|
|
|
|
GSL_Redefined
|
0.034***
|
|
|
(0.006)
|
|
|
|
|
PDMP_Redefined
|
-0.019***
|
|
|
(0.006)
|
|
|
|
|
Medicaid_Expansion_Redefined
|
0.099***
|
|
|
(0.006)
|
|
|
|
|
Intervention_Redefined
|
0.062***
|
|
|
(0.005)
|
|
|
|
|
num_states_w_intervention
|
0.004**
|
|
|
(0.002)
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.9
|
|
|
|
|
|
|
|
|
Constant
|
-9.911***
|
|
|
(0.054)
|
|
|
|
|
|
|
Observations
|
2,000
|
|
Adjusted R2
|
0.911
|
|
Log Likelihood
|
-16,741.690
|
|
UBRE
|
8.831
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
# exp(coef(sensitivity_anlys_imputed_od_wo_interp)["Intervention_Redefined"])
Compile Results
########## Sensitivity Analysis 3: Make Data Frame of Results and 95% CI #############
#store the coefficients into the table
sensitivity_anlys_wo_interp_full_table<-data.frame(coef(sensitivity_anlys_imputed_od_wo_interp))
#check to see how the table looks
head(sensitivity_anlys_wo_interp_full_table)
## coef.sensitivity_anlys_imputed_od_wo_interp.
## (Intercept) -9.9109753
## StateAlaska 0.2774932
## StateArizona 0.3148168
## StateArkansas -0.3899413
## StateCalifornia -0.1588927
## StateColorado 0.0967274
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_wo_interp_full_table)<-c("Coefficient_Estimate")
#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined",
"Naloxone_Pharmacy_No_Redefined",
"Medical_Marijuana_Redefined",
"Recreational_Marijuana_Redefined",
"GSL_Redefined",
"PDMP_Redefined",
"Medicaid_Expansion_Redefined",
"Intervention_Redefined",
"num_states_w_intervention")
#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_wo_interp_full_table))){
#if the coefficient is not in the covariates vector
if(!(rownames(sensitivity_anlys_wo_interp_full_table)[i] %in% covariates)){
#we see if it's a state effect
if(substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], start = 1, stop = 5) == "State"){
#if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
#and just rename these rows to just the state name
rownames(sensitivity_anlys_wo_interp_full_table)[i]<-substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], start = 6,
stop = nchar(rownames(sensitivity_anlys_wo_interp_full_table)[i]))
}else if(rownames(sensitivity_anlys_wo_interp_full_table)[i] == "(Intercept)"){
#otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
rownames(sensitivity_anlys_wo_interp_full_table)[i]<-"Intercept/Alabama"
}else if(substr(rownames(sensitivity_anlys_wo_interp_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){
#otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
#or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
#and change it to "Smoothed Time for Region"
rownames(sensitivity_anlys_wo_interp_full_table)[i]<-paste("Smoothed Time for Region ",
substr(rownames(sensitivity_anlys_wo_interp_full_table)[i],
start = 36,
stop = nchar(rownames(sensitivity_anlys_wo_interp_full_table)[i])),
sep = "")
}
}
}
#confidence intervals for the coefficients
sensitivity_anlys_wo_interp_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_wo_interp_full_table$Coefficient_Estimate - 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se
sensitivity_anlys_wo_interp_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_wo_interp_full_table$Coefficient_Estimate + 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se
#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_wo_interp_full_table$Odds_Ratio<-exp(sensitivity_anlys_wo_interp_full_table$Coefficient_Estimate)
sensitivity_anlys_wo_interp_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_wo_interp_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_wo_interp_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_wo_interp_full_table$Coefficient_Upper_Bound)
#store the standard error and p-value
sensitivity_anlys_wo_interp_full_table$Standard_Error<-summary(sensitivity_anlys_imputed_od_wo_interp)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_wo_interp_full_table$p_value<-c(summary(sensitivity_anlys_imputed_od_wo_interp)$p.pv,
rep(NA, length(coef(sensitivity_anlys_imputed_od_wo_interp)) -
length(summary(sensitivity_anlys_imputed_od_wo_interp)$p.pv)))
head(sensitivity_anlys_wo_interp_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama -9.9109753 -10.01690973
## Alaska 0.2774932 0.22340459
## Arizona 0.3148168 0.28749354
## Arkansas -0.3899413 -0.42845827
## California -0.1588927 -0.18490125
## Colorado 0.0967274 0.06529704
## Coefficient_Upper_Bound Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama -9.8050408 4.962701e-05 4.463868e-05
## Alaska 0.3315819 1.319817e+00 1.250326e+00
## Arizona 0.3421401 1.370008e+00 1.333082e+00
## Arkansas -0.3514244 6.770966e-01 6.515128e-01
## California -0.1328841 8.530879e-01 8.311864e-01
## Colorado 0.1281578 1.101560e+00 1.067476e+00
## Odds_Ratio_UB Standard_Error p_value
## Intercept/Alabama 5.517278e-05 0.05404819 0.000000e+00
## Alaska 1.393170e+00 0.02759624 8.690456e-24
## Arizona 1.407958e+00 0.01394046 6.372831e-113
## Arkansas 7.036851e-01 0.01965150 1.271106e-87
## California 8.755665e-01 0.01326967 4.855355e-33
## Colorado 1.136732e+00 0.01603590 1.620132e-09
tail(sensitivity_anlys_wo_interp_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4 0.2351076 0.20582369
## Smoothed Time for Region West.5 0.1834126 0.12970208
## Smoothed Time for Region West.6 0.1610046 0.08631049
## Smoothed Time for Region West.7 0.2036279 0.11614240
## Smoothed Time for Region West.8 0.3082962 0.20154232
## Smoothed Time for Region West.9 0.4224613 0.33093139
## Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4 0.2643916 1.265045
## Smoothed Time for Region West.5 0.2371231 1.201310
## Smoothed Time for Region West.6 0.2356987 1.174690
## Smoothed Time for Region West.7 0.2911134 1.225842
## Smoothed Time for Region West.8 0.4150502 1.361104
## Smoothed Time for Region West.9 0.5139912 1.525712
## Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4 1.228537 1.302638 0.01494079
## Smoothed Time for Region West.5 1.138489 1.267597 0.02740332
## Smoothed Time for Region West.6 1.090145 1.265793 0.03810924
## Smoothed Time for Region West.7 1.123156 1.337916 0.04463545
## Smoothed Time for Region West.8 1.223288 1.514447 0.05446629
## Smoothed Time for Region West.9 1.392264 1.671951 0.04669894
## p_value
## Smoothed Time for Region West.4 NA
## Smoothed Time for Region West.5 NA
## Smoothed Time for Region West.6 NA
## Smoothed Time for Region West.7 NA
## Smoothed Time for Region West.8 NA
## Smoothed Time for Region West.9 NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_wo_interp_full_table) %in% covariates)
sensitivity_anlys_wo_interp_covariate_table<-(round(sensitivity_anlys_wo_interp_full_table[covariate_Index,], 5))
#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_wo_interp_covariate_table)<-c("Naloxone_Pharmacy_Yes",
"Naloxone_Pharmacy_No",
"Medical_Marijuana",
"Recreational_Marijuana",
"GSL",
"PDMP",
"Medicaid_Expansion",
"Intervention_Redefined",
"Number of States with Intervention")
#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
sensitivity_anlys_wo_interp_covariate_table<-rbind(sensitivity_anlys_wo_interp_covariate_table, sensitivity_anlys_wo_interp_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_wo_interp_covariate_table<-sensitivity_anlys_wo_interp_covariate_table[,-which(colnames(sensitivity_anlys_wo_interp_covariate_table) %in%
c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]
colnames(sensitivity_anlys_wo_interp_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_wo_interp_covariate_table, 10)
## Risk_Ratio_Estimates RR_95_CI_LB
## Naloxone_Pharmacy_Yes 9.763200e-01 9.615900e-01
## Naloxone_Pharmacy_No 1.008580e+00 9.957900e-01
## Medical_Marijuana 1.064830e+00 1.053090e+00
## Recreational_Marijuana 9.636800e-01 9.475200e-01
## GSL 1.034510e+00 1.023170e+00
## PDMP 9.807700e-01 9.696700e-01
## Medicaid_Expansion 1.103950e+00 1.090910e+00
## Intervention_Redefined 1.063980e+00 1.052840e+00
## Number of States with Intervention 1.003700e+00 1.000080e+00
## Intercept/Alabama 4.962701e-05 4.463868e-05
## RR_95_CI_UB p-value
## Naloxone_Pharmacy_Yes 9.912800e-01 0.00200
## Naloxone_Pharmacy_No 1.021520e+00 0.18940
## Medical_Marijuana 1.076690e+00 0.00000
## Recreational_Marijuana 9.801100e-01 0.00002
## GSL 1.045970e+00 0.00000
## PDMP 9.919900e-01 0.00082
## Medicaid_Expansion 1.117130e+00 0.00000
## Intervention_Redefined 1.075240e+00 0.00000
## Number of States with Intervention 1.007330e+00 0.04506
## Intercept/Alabama 5.517278e-05 0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_wo_interp_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_wo_interp.csv")
Attributable Deaths
################ Sensitivity Analysis 3: Number of Attributable Deaths #############
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_wo_interp<-sensitivity_anlys_imputed_od_wo_interp_data[which(sensitivity_anlys_imputed_od_wo_interp_data$Intervention_Redefined>0),]
#compute the probability of overdose had intervention not occurred
prob_od_no_int_wo_interpolation<-expit(-coef(sensitivity_anlys_imputed_od_wo_interp)["Intervention_Redefined"]*attr_deaths_anlys_wo_interp$Intervention_Redefined
+ logit(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp/attr_deaths_anlys_wo_interp$population))
#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_imputed_od_wo_interp) - 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se
coef_ub<-coef(sensitivity_anlys_imputed_od_wo_interp) + 1.96*summary(sensitivity_anlys_imputed_od_wo_interp)$se
#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_wo_interp<-expit(-coef_lb[names(coef_lb) == "Intervention_Redefined"]*attr_deaths_anlys_wo_interp$Intervention_Redefined
+ logit(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp/attr_deaths_anlys_wo_interp$population))
prob_od_no_int_UB_wo_interp<-expit(-coef_ub[names(coef_ub) == "Intervention_Redefined"]*attr_deaths_anlys_wo_interp$Intervention_Redefined
+ logit(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp/attr_deaths_anlys_wo_interp$population))
#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_imputed_od_wo_interp_data$Time_Period_ID)))
#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention
index<-1 #keep track of where to store the values in the vector
for(time in sort(unique(attr_deaths_anlys_wo_interp$Time_Period_ID))){
#find the indices of rows where the time point = time
time_point_index<-which(attr_deaths_anlys_wo_interp$Time_Period_ID == time)
#find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
num_attr_od[index]<-sum(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp[time_point_index]
- prob_od_no_int_wo_interpolation[time_point_index]*attr_deaths_anlys_wo_interp$population[time_point_index])
#find the lower and upper bounds of the estimated number of deaths attributable to the intervention
num_attr_od_LB[index]<-sum(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp[time_point_index]
- prob_od_no_int_LB_wo_interp[time_point_index]*attr_deaths_anlys_wo_interp$population[time_point_index])
num_attr_od_UB[index]<-sum(attr_deaths_anlys_wo_interp$imputed_deaths_no_interp[time_point_index]
- prob_od_no_int_UB_wo_interp[time_point_index]*attr_deaths_anlys_wo_interp$population[time_point_index])
index<-index + 1
}
num_attr_od_wo_interpolation<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_wo_interp$Time_Period_ID)),
"Time_Start" = sort(unique(attr_deaths_anlys_wo_interp$Time_Period_Start)),
"Num_Attr_Deaths" = num_attr_od,
"Num_Attr_Deaths_LB" = num_attr_od_LB,
"Num_Attr_Deaths_UB" = num_attr_od_UB)
#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_wo_interpolation$Num_Attr_Deaths)
## [1] 33215.68
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_wo_interpolation<-num_attr_od_wo_interpolation %>%
group_by("year" = year(Time_Start)) %>%
summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
death_ub = sum(Num_Attr_Deaths_UB))
summary(yearly_num_Attr_Deaths_wo_interpolation$deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 29.91 755.56 1413.82 1660.78 2313.50 3696.16
Sensitivity Analysis 4: Effect of DIH Prosecutions on Unintentional Overdose Deaths, Assuming Effect Lasts for Two Years
Analysis
########### Sensitivity Analysis 4: Two Year Intervention Effect ######################
#create a plot for each state to see how many prosecution media alerts there are per 6 month period
#read in the prosecution media alert data
prosecution_data<-read.csv("./Data/dih_prosecutions_9_6_21.csv")
#data cleaning
prosecution_data<-prosecution_data %>%
mutate(Date = as.Date(Date.charged, "%m/%d/%Y")) %>%
mutate(State = ifelse(State.Filed == "pennsylvania", "Pennsylvania", State.Filed),
State = ifelse(State.Filed == "Virginia ", "Virginia", State)) %>%
filter(!is.na(Date), State.Filed != "No Info", State.Filed != "No info", State.Filed != "No Info ",
State != "")
#clean up the data by looking at the link to the article
prosecution_data$Date[prosecution_data$Date == "2026-08-01"] <- as.Date("2016-02-15", "%Y-%m-%d")
#change the states into Character instead of factor
prosecution_data$State<-as.character(prosecution_data$State)
#see how many prosecution data points there are for each state
table(prosecution_data$State)
Alabama Alaska Arizona Arkansas California
12 8 9 4 76
Colorado Connecticut Delaware Florida Georgia
32 47 3 138 29
Idaho Illinois Indiana Iowa Kansas
9 342 55 31 9
Kentucky Louisiana Maine Maryland Massachusetts
43 65 17 63 34
Michigan Minnesota Mississippi Missouri Montana
116 140 1 45 11
Nebraska Nevada New Hampshire New Jersey New Mexico
1 13 42 137 4
New York North Carolina North Dakota Ohio Oklahoma
110 124 53 404 41
Oregon Pennsylvania Rhode Island South Carolina South Dakota
19 726 2 12 13
Tennessee Texas Utah Vermont Virginia
94 44 21 13 63
Washington West Virginia Wisconsin Wyoming
78 33 381 19
#there are some repeated cases depending on victim
prosecution_data_unique <- prosecution_data %>%
group_by(State) %>%
distinct(Accused.Name, Date, .keep_all = T)
table(prosecution_data_unique$State)
Alabama Alaska Arizona Arkansas California
12 8 9 4 72
Colorado Connecticut Delaware Florida Georgia
30 46 3 134 26
Idaho Illinois Indiana Iowa Kansas
9 336 53 31 9
Kentucky Louisiana Maine Maryland Massachusetts
43 65 17 62 34
Michigan Minnesota Mississippi Missouri Montana
114 140 1 44 10
Nebraska Nevada New Hampshire New Jersey New Mexico
1 13 42 131 4
New York North Carolina North Dakota Ohio Oklahoma
105 121 40 395 34
Oregon Pennsylvania Rhode Island South Carolina South Dakota
19 718 2 12 13
Tennessee Texas Utah Vermont Virginia
94 43 21 13 63
Washington West Virginia Wisconsin Wyoming
75 33 373 19
#change date charged into Date object
prosecution_data_unique$Date<-mdy(prosecution_data_unique$Date.charged)
#group the data into six month periods
prosecution_data_unique<-prosecution_data_unique %>%
mutate(six_month_pd = lubridate::floor_date(Date , "6 months" ))
# #######ONLY IF GROUPS######
# prosecution_grouped <- prosecution_data_unique %>%
# #filter to dates after 2000 and dates before 2020
# filter(year(six_month_pd) >= 2000 & year(six_month_pd) <= 2019) %>%
# group_by(State, six_month_pd) %>%
# #for each state, for each six month period, count the number of DIH prosecutions
# summarise(num_dih = n()) %>%
# #label the groups according to zero, low, or high
# mutate(group = ifelse(num_dih == 0, "zero", ifelse(num_dih >= 5, "high", "low"))) %>%
# ungroup() %>%
# #have to add in a row for hawaii because its not in the prosecution dataset
# add_row(State = "Hawaii", six_month_pd = as.Date("2000-01-01"), num_dih = 0, group = "zero")
#
# #we compute the final group for each state by seeing if it ever hits high or low
# prosecution_grouped_final <- prosecution_grouped %>%
# group_by(State) %>%
# summarise(final_gp = ifelse(sum(group == "high") > 0, "high", ifelse(sum(group == "low")> 0, "low", "zero")))
#
# ggplot(prosecution_grouped_final, aes(final_gp)) +
# geom_bar() +
# labs(title = "Number of States by DIH prosecution Category, with Low = [1,5]") +
# geom_text(aes(label = ..count..), stat = "count", vjust = -.75)
#
# #number of DIH prosecutions per six month for each state
# # pdf("Figures/num_dih_per_six_month_pd_by_state_11_12_21.pdf")
# ggplot(prosecution_grouped, aes(x = six_month_pd, y = num_dih)) +
# geom_bar(stat = "identity") +
# facet_wrap(~State) +
# theme(axis.text.x = element_text(angle = 30, size = 5))
# # dev.off()
#
# # write.csv(prosecution_grouped, "./Data/num_dih_per_six_month_pd_by_state_11_12_21.csv")
#
#count the number of prosecution media alerts in each six month period
#we also get the first and last date of prosecution in time period
prosecution_data_by_six_month_pd <- prosecution_data_unique %>%
filter(year(six_month_pd)>1999 & year(six_month_pd)<2020) %>%
group_by(State, six_month_pd) %>%
summarise(first_date_in_pd = min(Date), last_date_in_pd = max(Date))
#create the data set used for this sensitivity analysis
#first, we merge the grouped prosecution data set with the main data set by state and time period
sensitivity_anlys_redefine_int_data<-merge(main_analysis_data,
prosecution_data_by_six_month_pd,
by.x = c("State", "Time_Period_Start"),
by.y = c("State", "six_month_pd"), all = TRUE)
#create a intervention 2 year effect variable by initializing it to be all 0
sensitivity_anlys_redefine_int_data<-sensitivity_anlys_redefine_int_data %>%
group_by(State) %>%
mutate(int_2_yr_effect = 0)
#change the date into a date object
sensitivity_anlys_redefine_int_data$Time_Period_Start<-as.Date(sensitivity_anlys_redefine_int_data$Time_Period_Start)
sensitivity_anlys_redefine_int_data$Time_Period_End<-as.Date(sensitivity_anlys_redefine_int_data$Time_Period_End)
#we need to impute the newly defined intervention variable depending on the case
#by examining each row of the data set
for(state in unique(sensitivity_anlys_redefine_int_data$State)){
#first, subset the data set into state_data which only contains the data for the state
state_index<-which(sensitivity_anlys_redefine_int_data$State == state)
state_data<-sensitivity_anlys_redefine_int_data[state_index,]
#note that the first four rows of the 2 year effect intervention variable are the same as the
#first four rows of the original intervention variable
state_data$int_2_yr_effect[1:4]<-state_data$Intervention_Redefined[1:4]
for(i in 5:nrow(state_data)){
#next, we deal with the rows where there was at least one prosecution in the last 3 six month periods
#These rows will be imputed with a 1
if((!is.na(state_data$first_date_in_pd[i - 1]) |
!is.na(state_data$first_date_in_pd[i - 2]) |
!is.na(state_data$first_date_in_pd[i - 3]))){
state_data$int_2_yr_effect[i]<-1
}else{
#next, we account for the rows with the fractions:
# 1) an intervention occurs in row i without an intervention 2 years ago
# 2) row i contains the lasting effects of an intervention that occurred 2 years ago
# 3) row i contains effects from both a new intervention starting in row i and lasting
# effects from 2 years ago
#To compute the fraction, we add the number of days that are affected by an intervention
#(from both the current prosecution and previous prosecution) and then divide by the total
#number of days in the period:
total_len_of_pd<-as.numeric(state_data$Time_Period_End[i] - state_data$Time_Period_Start[i])
#If there is no prosecution two years ago, i.e. in period i-4, then the last_date is the first
#date in period i. We subtract the last_date by the first date in the period, so we will get
#a 0 for the number of days that are affected by a prosecution from period i-4. Otherwise,
#the last_date is the last date of prosecution from period i-4, plus 2 years.
len_of_past_effect <- ifelse(!is.na(state_data$first_date_in_pd[i - 4]),
(state_data$last_date_in_pd[i - 4] + years(2)) - state_data$Time_Period_Start[i],
0)
#If there is no prosecution in the period i, then the start_date is the last date in the period i.
#We subtract start_date from the last date in period i, so we will get a 0 for the number
#of days that are affected by a prosecution in period i. Otherwise, the start_date is the
#first date of a prosecution in period i.
len_of_current_effect <- ifelse(!is.na(state_data$first_date_in_pd[i]),
as.numeric(state_data$Time_Period_End[i] - state_data$first_date_in_pd[i]),
0)
state_data$int_2_yr_effect[i]<-(len_of_past_effect + len_of_current_effect)/total_len_of_pd
}
}
#for the case where the int_2_yr_effect is greater than 1 (could result when we add the effects of
#previous intervention and the current intervention), we just impute a 1 instead
state_data$int_2_yr_effect[state_data$int_2_yr_effect>1]<-1
#lastly, we store the int_2_yr_effect variable into the sensitivity analysis data set
sensitivity_anlys_redefine_int_data$int_2_yr_effect[state_index]<-state_data$int_2_yr_effect
}
#view the data set just to make sure the imputation looks right
# View(sensitivity_anlys_redefine_int_data %>% select(State, Time_Period_Start, Time_Period_End,
# Intervention_Redefined, first_date_in_pd,
# last_date_in_pd,
# int_2_yr_effect))
sensitivity_anlys_redefine_int_data <- sensitivity_anlys_redefine_int_data %>%
group_by(Time_Period_Start) %>%
mutate(num_states_w_intervention_2_yr_effect = sum(int_2_yr_effect))
#run the analysis on the sensitivity analysis data
sensitivity_anlys_redefine_int_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
Naloxone_Pharmacy_Yes_Redefined +
Naloxone_Pharmacy_No_Redefined +
Medical_Marijuana_Redefined +
Recreational_Marijuana_Redefined +
GSL_Redefined +
PDMP_Redefined +
Medicaid_Expansion_Redefined +
int_2_yr_effect +
num_states_w_intervention_2_yr_effect,
data = sensitivity_anlys_redefine_int_data, family = "binomial")
stargazer(sensitivity_anlys_redefine_int_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
Unintentional Overdose Death
|
|
|
|
StateAlaska
|
0.264***
|
|
|
(0.028)
|
|
|
|
|
StateArizona
|
0.315***
|
|
|
(0.014)
|
|
|
|
|
StateArkansas
|
-0.369***
|
|
|
(0.020)
|
|
|
|
|
StateCalifornia
|
-0.157***
|
|
|
(0.013)
|
|
|
|
|
StateColorado
|
0.087***
|
|
|
(0.016)
|
|
|
|
|
StateConnecticut
|
0.179***
|
|
|
(0.016)
|
|
|
|
|
StateDelaware
|
0.420***
|
|
|
(0.022)
|
|
|
|
|
StateFlorida
|
0.248***
|
|
|
(0.012)
|
|
|
|
|
StateGeorgia
|
-0.056***
|
|
|
(0.013)
|
|
|
|
|
StateHawaii
|
-0.241***
|
|
|
(0.025)
|
|
|
|
|
StateIdaho
|
-0.142***
|
|
|
(0.024)
|
|
|
|
|
StateIllinois
|
-0.021
|
|
|
(0.013)
|
|
|
|
|
StateIndiana
|
0.082***
|
|
|
(0.014)
|
|
|
|
|
StateIowa
|
-0.740***
|
|
|
(0.021)
|
|
|
|
|
StateKansas
|
-0.310***
|
|
|
(0.019)
|
|
|
|
|
StateKentucky
|
0.634***
|
|
|
(0.014)
|
|
|
|
|
StateLouisiana
|
0.293***
|
|
|
(0.014)
|
|
|
|
|
StateMaine
|
0.142***
|
|
|
(0.022)
|
|
|
|
|
StateMaryland
|
-1.067***
|
|
|
(0.019)
|
|
|
|
|
StateMassachusetts
|
0.208***
|
|
|
(0.014)
|
|
|
|
|
StateMichigan
|
-0.028**
|
|
|
(0.014)
|
|
|
|
|
StateMinnesota
|
-0.628***
|
|
|
(0.017)
|
|
|
|
|
StateMississippi
|
-0.089***
|
|
|
(0.018)
|
|
|
|
|
StateMissouri
|
0.197***
|
|
|
(0.015)
|
|
|
|
|
StateMontana
|
-0.354***
|
|
|
(0.029)
|
|
|
|
|
StateNebraska
|
-0.863***
|
|
|
(0.029)
|
|
|
|
|
StateNevada
|
0.443***
|
|
|
(0.017)
|
|
|
|
|
StateNew Hampshire
|
0.250***
|
|
|
(0.020)
|
|
|
|
|
StateNew Jersey
|
0.110***
|
|
|
(0.013)
|
|
|
|
|
StateNew Mexico
|
0.641***
|
|
|
(0.017)
|
|
|
|
|
StateNew York
|
-0.244***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Carolina
|
0.177***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Dakota
|
-1.060***
|
|
|
(0.045)
|
|
|
|
|
StateOhio
|
0.450***
|
|
|
(0.012)
|
|
|
|
|
StateOklahoma
|
0.381***
|
|
|
(0.015)
|
|
|
|
|
StateOregon
|
-0.202***
|
|
|
(0.018)
|
|
|
|
|
StatePennsylvania
|
0.430***
|
|
|
(0.012)
|
|
|
|
|
StateRhode Island
|
0.245***
|
|
|
(0.022)
|
|
|
|
|
StateSouth Carolina
|
0.216***
|
|
|
(0.015)
|
|
|
|
|
StateSouth Dakota
|
-0.965***
|
|
|
(0.043)
|
|
|
|
|
StateTennessee
|
0.446***
|
|
|
(0.013)
|
|
|
|
|
StateTexas
|
-0.197***
|
|
|
(0.012)
|
|
|
|
|
StateUtah
|
0.081***
|
|
|
(0.018)
|
|
|
|
|
StateVermont
|
-0.162***
|
|
|
(0.031)
|
|
|
|
|
StateVirginia
|
-0.114***
|
|
|
(0.014)
|
|
|
|
|
StateWashington
|
0.064***
|
|
|
(0.015)
|
|
|
|
|
StateWest Virginia
|
0.868***
|
|
|
(0.015)
|
|
|
|
|
StateWisconsin
|
-0.040***
|
|
|
(0.015)
|
|
|
|
|
StateWyoming
|
0.026
|
|
|
(0.034)
|
|
|
|
|
Naloxone_Pharmacy_Yes_Redefined
|
-0.034***
|
|
|
(0.008)
|
|
|
|
|
Naloxone_Pharmacy_No_Redefined
|
0.003
|
|
|
(0.007)
|
|
|
|
|
Medical_Marijuana_Redefined
|
0.067***
|
|
|
(0.006)
|
|
|
|
|
Recreational_Marijuana_Redefined
|
-0.039***
|
|
|
(0.009)
|
|
|
|
|
GSL_Redefined
|
0.036***
|
|
|
(0.006)
|
|
|
|
|
PDMP_Redefined
|
-0.017***
|
|
|
(0.006)
|
|
|
|
|
Medicaid_Expansion_Redefined
|
0.103***
|
|
|
(0.006)
|
|
|
|
|
int_2_yr_effect
|
0.058***
|
|
|
(0.005)
|
|
|
|
|
num_states_w_intervention_2_yr_effect
|
-0.002**
|
|
|
(0.001)
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.9
|
|
|
|
|
|
|
|
|
Constant
|
-9.738***
|
|
|
(0.028)
|
|
|
|
|
|
|
Observations
|
2,000
|
|
Adjusted R2
|
0.911
|
|
Log Likelihood
|
-16,745.360
|
|
UBRE
|
8.835
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Compile Results
############## Sensitivity Analysis 4: Make Data Frame of Results and 95% CI ##########
#store the coefficients into the table
sensitivity_anlys_redefine_int_full_table<-data.frame(coef(sensitivity_anlys_redefine_int_model))
#check to see how the table looks
head(sensitivity_anlys_redefine_int_full_table)
## coef.sensitivity_anlys_redefine_int_model.
## (Intercept) -9.73777125
## StateAlaska 0.26352781
## StateArizona 0.31539440
## StateArkansas -0.36941493
## StateCalifornia -0.15738403
## StateColorado 0.08700546
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_redefine_int_full_table)<-c("Coefficient_Estimate")
#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined",
"Naloxone_Pharmacy_No_Redefined",
"Medical_Marijuana_Redefined",
"Recreational_Marijuana_Redefined",
"GSL_Redefined",
"PDMP_Redefined",
"Medicaid_Expansion_Redefined",
"int_2_yr_effect",
"num_states_w_intervention_2_yr_effect")
#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_redefine_int_full_table))){
#if the coefficient is not in the covariates vector
if(!(rownames(sensitivity_anlys_redefine_int_full_table)[i] %in% covariates)){
#we see if it's a state effect
if(substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 1, stop = 5) == "State"){
#if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
#and just rename these rows to just the state name
rownames(sensitivity_anlys_redefine_int_full_table)[i]<-substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 6,
stop = nchar(rownames(sensitivity_anlys_redefine_int_full_table)[i]))
}else if(rownames(sensitivity_anlys_redefine_int_full_table)[i] == "(Intercept)"){
#otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
rownames(sensitivity_anlys_redefine_int_full_table)[i]<-"Intercept/Alabama"
}else if(substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){
#otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
#or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
#and change it to "Smoothed Time for Region"
rownames(sensitivity_anlys_redefine_int_full_table)[i]<-paste("Smoothed Time for Region ",
substr(rownames(sensitivity_anlys_redefine_int_full_table)[i], start = 36,
stop = nchar(rownames(sensitivity_anlys_redefine_int_full_table)[i])),
sep = "")
}
}
}
#confidence intervals for the coefficients
sensitivity_anlys_redefine_int_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_redefine_int_full_table$Coefficient_Estimate - 1.96*summary(sensitivity_anlys_redefine_int_model)$se
sensitivity_anlys_redefine_int_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_redefine_int_full_table$Coefficient_Estimate + 1.96*summary(sensitivity_anlys_redefine_int_model)$se
#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_redefine_int_full_table$Odds_Ratio<-exp(sensitivity_anlys_redefine_int_full_table$Coefficient_Estimate)
sensitivity_anlys_redefine_int_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_redefine_int_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_redefine_int_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_redefine_int_full_table$Coefficient_Upper_Bound)
#store the standard error and p-value
sensitivity_anlys_redefine_int_full_table$Standard_Error<-summary(sensitivity_anlys_redefine_int_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_redefine_int_full_table$p_value<-c(summary(sensitivity_anlys_redefine_int_model)$p.pv,
rep(NA, length(coef(sensitivity_anlys_redefine_int_model)) -
length(summary(sensitivity_anlys_redefine_int_model)$p.pv)))
head(sensitivity_anlys_redefine_int_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama -9.73777125 -9.79199494
## Alaska 0.26352781 0.20958992
## Arizona 0.31539440 0.28809260
## Arkansas -0.36941493 -0.40821397
## California -0.15738403 -0.18334805
## Colorado 0.08700546 0.05563784
## Coefficient_Upper_Bound Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama -9.6835476 5.901191e-05 5.589727e-05
## Alaska 0.3174657 1.301513e+00 1.233172e+00
## Arizona 0.3426962 1.370800e+00 1.333881e+00
## Arkansas -0.3306159 6.911386e-01 6.648366e-01
## California -0.1314200 8.543759e-01 8.324784e-01
## Colorado 0.1183731 1.090903e+00 1.057215e+00
## Odds_Ratio_UB Standard_Error p_value
## Intercept/Alabama 0.0000623001 0.02766515 0.000000e+00
## Alaska 1.3736421285 0.02751933 1.007809e-21
## Arizona 1.4087407223 0.01392949 1.664931e-113
## Arkansas 0.7184810905 0.01979543 1.015868e-77
## California 0.8768494050 0.01324695 1.489753e-32
## Colorado 1.1256640001 0.01600389 5.433111e-08
tail(sensitivity_anlys_redefine_int_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4 0.2581725 0.2371533
## Smoothed Time for Region West.5 0.2624918 0.2309442
## Smoothed Time for Region West.6 0.2674418 0.2293683
## Smoothed Time for Region West.7 0.3501250 0.3017143
## Smoothed Time for Region West.8 0.4826199 0.4246320
## Smoothed Time for Region West.9 0.5736609 0.5182325
## Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4 0.2791917 1.294562
## Smoothed Time for Region West.5 0.2940395 1.300166
## Smoothed Time for Region West.6 0.3055153 1.306618
## Smoothed Time for Region West.7 0.3985358 1.419245
## Smoothed Time for Region West.8 0.5406077 1.620314
## Smoothed Time for Region West.9 0.6290893 1.774752
## Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4 1.267635 1.322061 0.01072407
## Smoothed Time for Region West.5 1.259789 1.341837 0.01609575
## Smoothed Time for Region West.6 1.257805 1.357324 0.01942526
## Smoothed Time for Region West.7 1.352175 1.489642 0.02469936
## Smoothed Time for Region West.8 1.529028 1.717050 0.02958563
## Smoothed Time for Region West.9 1.679057 1.875901 0.02827980
## p_value
## Smoothed Time for Region West.4 NA
## Smoothed Time for Region West.5 NA
## Smoothed Time for Region West.6 NA
## Smoothed Time for Region West.7 NA
## Smoothed Time for Region West.8 NA
## Smoothed Time for Region West.9 NA
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_redefine_int_full_table) %in% covariates)
sensitivity_anlys_redefine_int_covariate_table<-(round(sensitivity_anlys_redefine_int_full_table[covariate_Index,], 5))
#rename the variables so that it looks cleaner
rownames(sensitivity_anlys_redefine_int_covariate_table)<-c("Naloxone_Pharmacy_Yes",
"Naloxone_Pharmacy_No",
"Medical_Marijuana",
"Recreational_Marijuana",
"GSL",
"PDMP",
"Medicaid_Expansion",
"Two Year Intervention Effect",
"Number of States w Intervention")
#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
sensitivity_anlys_redefine_int_covariate_table<-rbind(sensitivity_anlys_redefine_int_covariate_table, sensitivity_anlys_redefine_int_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sensitivity_anlys_redefine_int_covariate_table<-sensitivity_anlys_redefine_int_covariate_table[,-which(colnames(sensitivity_anlys_redefine_int_covariate_table) %in%
c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]
colnames(sensitivity_anlys_redefine_int_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sensitivity_anlys_redefine_int_covariate_table, 10)
## Risk_Ratio_Estimates RR_95_CI_LB RR_95_CI_UB
## Naloxone_Pharmacy_Yes 9.664500e-01 9.517900e-01 9.81330e-01
## Naloxone_Pharmacy_No 1.002860e+00 9.901100e-01 1.01578e+00
## Medical_Marijuana 1.069810e+00 1.058000e+00 1.08175e+00
## Recreational_Marijuana 9.615700e-01 9.454300e-01 9.77970e-01
## GSL 1.036740e+00 1.025360e+00 1.04826e+00
## PDMP 9.833200e-01 9.721900e-01 9.94570e-01
## Medicaid_Expansion 1.108180e+00 1.095140e+00 1.12138e+00
## Two Year Intervention Effect 1.059240e+00 1.049520e+00 1.06905e+00
## Number of States w Intervention 9.975500e-01 9.955300e-01 9.99570e-01
## Intercept/Alabama 5.901191e-05 5.589727e-05 6.23001e-05
## p-value
## Naloxone_Pharmacy_Yes 0.00001
## Naloxone_Pharmacy_No 0.66138
## Medical_Marijuana 0.00000
## Recreational_Marijuana 0.00001
## GSL 0.00000
## PDMP 0.00375
## Medicaid_Expansion 0.00000
## Two Year Intervention Effect 0.00000
## Number of States w Intervention 0.01738
## Intercept/Alabama 0.00000
#save the table into a CSV
# write.csv(round(sensitivity_anlys_redefine_int_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_full_data_redefine_int.csv")
Attributable Deaths
################ Sensitivity Analysis 4: Number of Attributable Deaths ################
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_redefine_int<-sensitivity_anlys_redefine_int_data[which(sensitivity_anlys_redefine_int_data$int_2_yr_effect>0),]
#compute the probability of overdose had intervention not occurred
prob_od_no_int_redefine_int<-expit(-coef(sensitivity_anlys_redefine_int_model)["int_2_yr_effect"]*attr_deaths_anlys_redefine_int$int_2_yr_effect
+ logit(attr_deaths_anlys_redefine_int$imputed_deaths/attr_deaths_anlys_redefine_int$population))
#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(sensitivity_anlys_redefine_int_model) - 1.96*summary(sensitivity_anlys_redefine_int_model)$se
coef_ub<-coef(sensitivity_anlys_redefine_int_model) + 1.96*summary(sensitivity_anlys_redefine_int_model)$se
#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_redefine_int<-expit(-coef_lb[names(coef_lb) == "int_2_yr_effect"]*attr_deaths_anlys_redefine_int$int_2_yr_effect
+ logit(attr_deaths_anlys_redefine_int$imputed_deaths/attr_deaths_anlys_redefine_int$population))
prob_od_no_int_UB_redefine_int<-expit(-coef_ub[names(coef_ub) == "int_2_yr_effect"]*attr_deaths_anlys_redefine_int$int_2_yr_effect
+ logit(attr_deaths_anlys_redefine_int$imputed_deaths/attr_deaths_anlys_redefine_int$population))
#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(sensitivity_anlys_redefine_int_data$Time_Period_ID)))
#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths that attributable to the intervention
index<-1 #keep track of where to store the values in the vector
for(time in sort(unique(attr_deaths_anlys_redefine_int$Time_Period_ID))){
#find the indices of rows where the time point = time
time_point_index<-which(attr_deaths_anlys_redefine_int$Time_Period_ID == time)
#find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
num_attr_od[index]<-sum(attr_deaths_anlys_redefine_int$imputed_deaths[time_point_index]
- prob_od_no_int_redefine_int[time_point_index]*attr_deaths_anlys_redefine_int$population[time_point_index])
#find the lower and upper bounds of the estimated number of deaths attributable to the intervention
num_attr_od_LB[index]<-sum(attr_deaths_anlys_redefine_int$imputed_deaths[time_point_index]
- prob_od_no_int_LB_redefine_int[time_point_index]*attr_deaths_anlys_redefine_int$population[time_point_index])
num_attr_od_UB[index]<-sum(attr_deaths_anlys_redefine_int$imputed_deaths[time_point_index]
- prob_od_no_int_UB_redefine_int[time_point_index]*attr_deaths_anlys_redefine_int$population[time_point_index])
index<-index + 1
}
num_attr_od_redefine_int<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_redefine_int$Time_Period_ID)),
"Time_Start" = sort(unique(attr_deaths_anlys_redefine_int$Time_Period_Start)),
"Num_Attr_Deaths" = num_attr_od,
"Num_Attr_Deaths_LB" = num_attr_od_LB,
"Num_Attr_Deaths_UB" = num_attr_od_UB)
#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_redefine_int$Num_Attr_Deaths)
## [1] 28144.81
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_redefine_int<-num_attr_od_redefine_int %>%
group_by("year" = year(Time_Start)) %>%
summarise("deaths" = sum(Num_Attr_Deaths),
death_lb = sum(Num_Attr_Deaths_LB),
death_ub = sum(Num_Attr_Deaths_UB))
summary(yearly_num_Attr_Deaths_redefine_int$deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 27.8 494.0 1184.5 1407.2 1990.3 3358.7
ggplot(yearly_num_Attr_Deaths_redefine_int, aes(x = year, y = deaths)) + geom_line() + geom_point()

Sensitivity Analysis 5: Effect of Lagged DIH Prosecutions on Unintentional Overdose Deaths, Assuming Effect Lasts for Two Years
Analysis
############# Sensitivity Analysis 5: Two Year Effect Lagged by 6 months ######################
#create a plot for each state to see how many prosecution media alerts there are per 6 month period
#read in the prosecution media alert data
prosecution_data<-read.csv("./Data/dih_prosecutions_9_6_21.csv")
#data cleaning
#data cleaning
prosecution_data<-prosecution_data %>%
mutate(Date = as.Date(Date.charged, "%m/%d/%Y")) %>%
mutate(State = ifelse(State.Filed == "pennsylvania", "Pennsylvania", State.Filed),
State = ifelse(State.Filed == "Virginia ", "Virginia", State)) %>%
filter(!is.na(Date), State.Filed != "No Info", State.Filed != "No info", State.Filed != "No Info ",
State != "")
#clean up the data by looking at the link to the article
prosecution_data$Date[prosecution_data$Date == "2026-08-01"] <- as.Date("2016-02-15", "%Y-%m-%d")
#change the states into Character instead of factor
prosecution_data$State<-as.character(prosecution_data$State)
#see how many prosecution data points there are for each state
table(prosecution_data$State)
Alabama Alaska Arizona Arkansas California
12 8 9 4 76
Colorado Connecticut Delaware Florida Georgia
32 47 3 138 29
Idaho Illinois Indiana Iowa Kansas
9 342 55 31 9
Kentucky Louisiana Maine Maryland Massachusetts
43 65 17 63 34
Michigan Minnesota Mississippi Missouri Montana
116 140 1 45 11
Nebraska Nevada New Hampshire New Jersey New Mexico
1 13 42 137 4
New York North Carolina North Dakota Ohio Oklahoma
110 124 53 404 41
Oregon Pennsylvania Rhode Island South Carolina South Dakota
19 726 2 12 13
Tennessee Texas Utah Vermont Virginia
94 44 21 13 63
Washington West Virginia Wisconsin Wyoming
78 33 381 19
#change date charged into Date object
prosecution_data$Date<-mdy(prosecution_data$Date.charged)
#group the data into six month periods
prosecution_data<-prosecution_data %>% mutate(six_month_pd = lubridate::floor_date(Date , "6 months" ))
#count the number of prosecution media alerts in each six month period
#we also get the first and last date of prosecution in time period
prosecution_data_by_six_month_pd <- prosecution_data %>%
filter(year(six_month_pd)>1999 & year(six_month_pd)<2020) %>%
group_by(State, six_month_pd) %>%
summarise(first_date_in_pd = min(Date), last_date_in_pd = max(Date))
#create the data set used for this sensitivity analysis
#first, we merge the grouped prosecution data set with the main data set by state and time period
sensitivity_anlys_2yr_int_lag<-merge(main_analysis_data,
prosecution_data_by_six_month_pd,
by.x = c("State", "Time_Period_Start"),
by.y = c("State", "six_month_pd"), all = TRUE)
#create a intervention 2 year effect variable by initializing it to be all 0
sensitivity_anlys_2yr_int_lag<-sensitivity_anlys_2yr_int_lag %>%
group_by(State) %>%
mutate(int_2_yr_effect_lag = 0)
#change the date into a date object
sensitivity_anlys_2yr_int_lag$Time_Period_Start<-as.Date(sensitivity_anlys_2yr_int_lag$Time_Period_Start)
sensitivity_anlys_2yr_int_lag$Time_Period_End<-as.Date(sensitivity_anlys_2yr_int_lag$Time_Period_End)
#we need to calculate the 2 year intervention variable depending on the case
#by examining each row of the data set
for(state in unique(sensitivity_anlys_2yr_int_lag$State)){
#first, subset the data set into state_data which only contains the data for the state
state_index<-which(sensitivity_anlys_2yr_int_lag$State == state)
state_data<-sensitivity_anlys_2yr_int_lag[state_index,]
for(i in 1:(nrow(state_data)-1)){
#for the states that had at least one prosecution in the first 2 years of analysis period,
#we impute the intervention variable based on the intervention variable of main analysis:
#if intervention occurred in time i, then for the 6-month lagged effect, we compute the
#proportion of days affected by intervention, taking into account the 6 month lag. Else,
#if the intervention had occurred by time i, we impute a 1 in the next six-month interval
#since lagged
if(i %in% c(1:4)){
if(state_data$Intervention_Redefined[i] > 0 & state_data$Intervention_Redefined[i] < 1){
state_data$int_2_yr_effect_lag[i + 1] <- as.numeric(state_data$Time_Period_End[i + 1] - (state_data$first_date_in_pd[i] %m+% months(6)))/as.numeric(state_data$Time_Period_End[i + 1] - state_data$Time_Period_Start[i + 1])
}else if(state_data$Intervention_Redefined[i] == 1){
state_data$int_2_yr_effect_lag[i + 1] <- 1
}
#next, if there was at least one prosecution in the last 3 six-month periods (i.e. 1.5 years) before time i
#These rows will be imputed with a 1 in the next six-month interval since lagged
}else if((!is.na(state_data$first_date_in_pd[i - 1]) |
!is.na(state_data$first_date_in_pd[i - 2]) |
!is.na(state_data$first_date_in_pd[i - 3]))){
state_data$int_2_yr_effect_lag[i + 1]<-1
}else{
#next, we account for the rows with the fractions:
# 1) an intervention occurs in row i without an intervention 2 years ago
# 2) row i contains the lasting effects of an intervention that occurred 2 years ago
# 3) row i contains effects from both a new intervention starting in row i and lasting
# effects from 2 years ago
#To compute the fraction, we add the number of days that are affected by an intervention
#(from both the current prosecution and previous prosecution) and then divide by the total
#number of days in the period:
#first, we compute the number of days in the period of time interval i + 1
total_len_of_pd<-as.numeric(state_data$Time_Period_End[i + 1] - state_data$Time_Period_Start[i + 1])
#If there is no prosecution two years ago, i.e. in period i-4, then the last_date is the first
#date in period i + 1. We subtract the last_date by the first date in period i + 1, so we will get
#a 0 for the number of days that are affected by a prosecution from period i-4. Otherwise,
#the last_date is the last date of prosecution from period i-4, plus 2 years.
#The start time is the first date of period i + 1
len_of_past_effect <- ifelse(!is.na(state_data$first_date_in_pd[i - 4]),
as.numeric((as.Date(state_data$last_date_in_pd[i - 4] + years(2), format = "%Y-%m-%d") %m+% months(6)) - state_data$Time_Period_Start[i + 1]),
0)
#If there is no prosecution in the period i, then the start_date is the last date in period i + 1 (because lagged effect).
#We subtract start_date from the last date in period i + 1, so we will get a 0 for the number
#of days that are affected by a prosecution in period i. Otherwise, the start_date is the
#first date of a prosecution in period i + 6 months. The end date is the last date in period i + 1.
len_of_current_effect <- ifelse(!is.na(state_data$first_date_in_pd[i]),
as.numeric(state_data$Time_Period_End[i + 1] - (state_data$first_date_in_pd[i] %m+% months(6))),
0)
state_data$int_2_yr_effect_lag[i + 1]<-(len_of_past_effect + len_of_current_effect)/total_len_of_pd
}
}
#for the case where the int_2_yr_effect is greater than 1 (could result when we add the effects of
#previous intervention and the current intervention), we just impute a 1 instead
state_data$int_2_yr_effect_lag[state_data$int_2_yr_effect_lag>1]<-1
#lastly, we store the int_2_yr_effect variable into the sensitivity analysis data set
sensitivity_anlys_2yr_int_lag$int_2_yr_effect_lag[state_index]<-state_data$int_2_yr_effect_lag
}
sensitivity_anlys_2yr_int_lag <- sensitivity_anlys_2yr_int_lag %>%
group_by(Time_Period_Start) %>%
mutate(num_states_w_intervention_2yr_lag = sum(int_2_yr_effect_lag))
#view the data set just to make sure the imputation looks right
# View(sensitivity_anlys_2yr_int_lag %>% select(State, Time_Period_Start, Time_Period_End,
# Intervention_Redefined, first_date_in_pd,
# last_date_in_pd,
# int_2_yr_effect_lag))
#run the analysis for all the states
lagged_analysis_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
Naloxone_Pharmacy_Yes_Redefined +
Naloxone_Pharmacy_No_Redefined +
Medical_Marijuana_Redefined +
Recreational_Marijuana_Redefined +
GSL_Redefined +
PDMP_Redefined +
Medicaid_Expansion_Redefined +
int_2_yr_effect_lag +
num_states_w_intervention_2yr_lag,
data = sensitivity_anlys_2yr_int_lag, family = "binomial")
#summary output of the model
stargazer(lagged_analysis_model, type = "html", dep.var.labels = "Unintentional Overdose Death")
|
|
|
|
Dependent variable:
|
|
|
|
|
|
Unintentional Overdose Death
|
|
|
|
StateAlaska
|
0.259***
|
|
|
(0.028)
|
|
|
|
|
StateArizona
|
0.310***
|
|
|
(0.014)
|
|
|
|
|
StateArkansas
|
-0.382***
|
|
|
(0.020)
|
|
|
|
|
StateCalifornia
|
-0.153***
|
|
|
(0.013)
|
|
|
|
|
StateColorado
|
0.087***
|
|
|
(0.016)
|
|
|
|
|
StateConnecticut
|
0.183***
|
|
|
(0.016)
|
|
|
|
|
StateDelaware
|
0.414***
|
|
|
(0.022)
|
|
|
|
|
StateFlorida
|
0.256***
|
|
|
(0.012)
|
|
|
|
|
StateGeorgia
|
-0.056***
|
|
|
(0.013)
|
|
|
|
|
StateHawaii
|
-0.253***
|
|
|
(0.025)
|
|
|
|
|
StateIdaho
|
-0.149***
|
|
|
(0.024)
|
|
|
|
|
StateIllinois
|
-0.015
|
|
|
(0.013)
|
|
|
|
|
StateIndiana
|
0.083***
|
|
|
(0.014)
|
|
|
|
|
StateIowa
|
-0.737***
|
|
|
(0.021)
|
|
|
|
|
StateKansas
|
-0.315***
|
|
|
(0.019)
|
|
|
|
|
StateKentucky
|
0.633***
|
|
|
(0.014)
|
|
|
|
|
StateLouisiana
|
0.297***
|
|
|
(0.014)
|
|
|
|
|
StateMaine
|
0.143***
|
|
|
(0.022)
|
|
|
|
|
StateMaryland
|
-1.062***
|
|
|
(0.019)
|
|
|
|
|
StateMassachusetts
|
0.209***
|
|
|
(0.014)
|
|
|
|
|
StateMichigan
|
-0.022
|
|
|
(0.014)
|
|
|
|
|
StateMinnesota
|
-0.623***
|
|
|
(0.017)
|
|
|
|
|
StateMississippi
|
-0.102***
|
|
|
(0.018)
|
|
|
|
|
StateMissouri
|
0.199***
|
|
|
(0.015)
|
|
|
|
|
StateMontana
|
-0.352***
|
|
|
(0.029)
|
|
|
|
|
StateNebraska
|
-0.875***
|
|
|
(0.029)
|
|
|
|
|
StateNevada
|
0.444***
|
|
|
(0.017)
|
|
|
|
|
StateNew Hampshire
|
0.251***
|
|
|
(0.020)
|
|
|
|
|
StateNew Jersey
|
0.112***
|
|
|
(0.013)
|
|
|
|
|
StateNew Mexico
|
0.634***
|
|
|
(0.017)
|
|
|
|
|
StateNew York
|
-0.242***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Carolina
|
0.180***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Dakota
|
-1.061***
|
|
|
(0.045)
|
|
|
|
|
StateOhio
|
0.457***
|
|
|
(0.012)
|
|
|
|
|
StateOklahoma
|
0.379***
|
|
|
(0.015)
|
|
|
|
|
StateOregon
|
-0.202***
|
|
|
(0.018)
|
|
|
|
|
StatePennsylvania
|
0.436***
|
|
|
(0.012)
|
|
|
|
|
StateRhode Island
|
0.237***
|
|
|
(0.022)
|
|
|
|
|
StateSouth Carolina
|
0.209***
|
|
|
(0.015)
|
|
|
|
|
StateSouth Dakota
|
-0.973***
|
|
|
(0.043)
|
|
|
|
|
StateTennessee
|
0.442***
|
|
|
(0.013)
|
|
|
|
|
StateTexas
|
-0.195***
|
|
|
(0.012)
|
|
|
|
|
StateUtah
|
0.080***
|
|
|
(0.018)
|
|
|
|
|
StateVermont
|
-0.164***
|
|
|
(0.031)
|
|
|
|
|
StateVirginia
|
-0.109***
|
|
|
(0.014)
|
|
|
|
|
StateWashington
|
0.069***
|
|
|
(0.015)
|
|
|
|
|
StateWest Virginia
|
0.866***
|
|
|
(0.015)
|
|
|
|
|
StateWisconsin
|
-0.035**
|
|
|
(0.015)
|
|
|
|
|
StateWyoming
|
0.022
|
|
|
(0.034)
|
|
|
|
|
Naloxone_Pharmacy_Yes_Redefined
|
-0.027***
|
|
|
(0.008)
|
|
|
|
|
Naloxone_Pharmacy_No_Redefined
|
0.005
|
|
|
(0.007)
|
|
|
|
|
Medical_Marijuana_Redefined
|
0.066***
|
|
|
(0.006)
|
|
|
|
|
Recreational_Marijuana_Redefined
|
-0.039***
|
|
|
(0.009)
|
|
|
|
|
GSL_Redefined
|
0.034***
|
|
|
(0.006)
|
|
|
|
|
PDMP_Redefined
|
-0.018***
|
|
|
(0.006)
|
|
|
|
|
Medicaid_Expansion_Redefined
|
0.102***
|
|
|
(0.006)
|
|
|
|
|
int_2_yr_effect_lag
|
0.036***
|
|
|
(0.005)
|
|
|
|
|
num_states_w_intervention_2yr_lag
|
0.0001
|
|
|
(0.001)
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.9
|
|
|
|
|
|
|
|
|
Constant
|
-9.787***
|
|
|
(0.030)
|
|
|
|
|
|
|
Observations
|
2,000
|
|
Adjusted R2
|
0.911
|
|
Log Likelihood
|
-16,792.500
|
|
UBRE
|
8.882
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Plots
gam.check(lagged_analysis_model)

##
## Method: UBRE Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [-7.10661e-07,2.358126e-05]
## (score 8.881691 & scale 1).
## Hessian positive definite, eigenvalue range [8.779527e-05,0.0005068349].
## Model rank = 95 / 95
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(Time_Period_ID):as.factor(Region)Midwest 9.00 8.88 1.05 1.00
## s(Time_Period_ID):as.factor(Region)Northeast 9.00 8.95 1.05 0.99
## s(Time_Period_ID):as.factor(Region)South 9.00 8.92 1.05 0.99
## s(Time_Period_ID):as.factor(Region)West 9.00 7.88 1.05 0.99
Compile Results
############## Sensitivity Analysis 5: Make Data Frame of Results and 95% CI ###########
#store the coefficients into the table
sensitivity_anlys_2yr_int_lag_full_table<-data.frame(coef(lagged_analysis_model))
#check to see how the table looks
head(sensitivity_anlys_2yr_int_lag_full_table)
## coef.lagged_analysis_model.
## (Intercept) -9.78710373
## StateAlaska 0.25891518
## StateArizona 0.30958384
## StateArkansas -0.38223314
## StateCalifornia -0.15314559
## StateColorado 0.08682166
#rename the column to "Coefficient_Estimate"
colnames(sensitivity_anlys_2yr_int_lag_full_table)<-c("Coefficient_Estimate")
#vector of covariates
covariates<-c("Naloxone_Pharmacy_Yes_Redefined",
"Naloxone_Pharmacy_No_Redefined",
"Medical_Marijuana_Redefined",
"Recreational_Marijuana_Redefined",
"GSL_Redefined",
"PDMP_Redefined",
"Medicaid_Expansion_Redefined",
"int_2_yr_effect_lag",
"num_states_w_intervention_2yr_lag")
#rename the variable names of the regression output so that they look nicer:
#currently there are 3 types of coefficients: state effects, the covariates, and smoothed time effects
#for each row in the main analysis table
for(i in 1:length(rownames(sensitivity_anlys_2yr_int_lag_full_table))){
#if the coefficient is not in the covariates vector
if(!(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i] %in% covariates)){
#we see if it's a state effect
if(substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 1, stop = 5) == "State"){
#if so, here, the names look like: StateMassachusetts or StateGeorgia, so take out the "State" part
#and just rename these rows to just the state name
rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]<-substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 6,
stop = nchar(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]))
}else if(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i] == "(Intercept)"){
#otherwise, if the current name is Intercept, we rename it so that we know that Alabama is the baseline
rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]<-"Intercept/Alabama"
}else if(substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 1, stop = 35) == "s(Time_Period_ID):as.factor(Region)"){
#otherwise, it's the smoothed time effects which look like: s(Time_Period_ID):as.factor(Region)West
#or s(Time_Period_ID):as.factor(Region)South, so we want to get rid of "s(Time_Period_ID):as.factor(Region)"
#and change it to "Smoothed Time for Region"
rownames(sensitivity_anlys_2yr_int_lag_full_table)[i]<-paste("Smoothed Time for Region ",
substr(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i], start = 36,
stop = nchar(rownames(sensitivity_anlys_2yr_int_lag_full_table)[i])),
sep = "")
}
}
}
#confidence intervals for the coefficients
sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Lower_Bound<-sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Estimate - 1.96*summary(lagged_analysis_model)$se
sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Upper_Bound<-sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Estimate + 1.96*summary(lagged_analysis_model)$se
#impute the estimates and confidence intervals in the odds ratio scale
sensitivity_anlys_2yr_int_lag_full_table$Odds_Ratio<-exp(sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Estimate)
sensitivity_anlys_2yr_int_lag_full_table$Odds_Ratio_LB<-exp(sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Lower_Bound)
sensitivity_anlys_2yr_int_lag_full_table$Odds_Ratio_UB<-exp(sensitivity_anlys_2yr_int_lag_full_table$Coefficient_Upper_Bound)
#store the standard error and p-value
sensitivity_anlys_2yr_int_lag_full_table$Standard_Error<-summary(lagged_analysis_model)$se
#note that there is no p-value for the smoothed time effects, so we put a NA for those rows
sensitivity_anlys_2yr_int_lag_full_table$p_value<-c(summary(lagged_analysis_model)$p.pv, rep(NA, length(coef(lagged_analysis_model)) - length(summary(lagged_analysis_model)$p.pv)))
head(sensitivity_anlys_2yr_int_lag_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Intercept/Alabama -9.78710373 -9.84541050
## Alaska 0.25891518 0.20497761
## Arizona 0.30958384 0.28228183
## Arkansas -0.38223314 -0.42101146
## California -0.15314559 -0.17910494
## Colorado 0.08682166 0.05546585
## Coefficient_Upper_Bound Odds_Ratio Odds_Ratio_LB
## Intercept/Alabama -9.7287970 5.617135e-05 5.298983e-05
## Alaska 0.3128528 1.295524e+00 1.227498e+00
## Arizona 0.3368859 1.362858e+00 1.326152e+00
## Arkansas -0.3434548 6.823360e-01 6.563826e-01
## California -0.1271862 8.580048e-01 8.360182e-01
## Colorado 0.1181775 1.090702e+00 1.057033e+00
## Odds_Ratio_UB Standard_Error p_value
## Intercept/Alabama 5.954389e-05 0.02974835 0.000000e+00
## Alaska 1.367320e+00 0.02751917 5.030825e-21
## Arizona 1.400579e+00 0.01392960 1.973535e-109
## Arkansas 7.093155e-01 0.01978486 3.683783e-83
## California 8.805697e-01 0.01324457 6.352769e-31
## Colorado 1.125444e+00 0.01599786 5.728374e-08
tail(sensitivity_anlys_2yr_int_lag_full_table)
## Coefficient_Estimate Coefficient_Lower_Bound
## Smoothed Time for Region West.4 0.2590000 0.2386586
## Smoothed Time for Region West.5 0.2418414 0.2090413
## Smoothed Time for Region West.6 0.2408127 0.1988066
## Smoothed Time for Region West.7 0.3008425 0.2471347
## Smoothed Time for Region West.8 0.4279615 0.3621048
## Smoothed Time for Region West.9 0.5172422 0.4535700
## Coefficient_Upper_Bound Odds_Ratio
## Smoothed Time for Region West.4 0.2793414 1.295634
## Smoothed Time for Region West.5 0.2746415 1.273592
## Smoothed Time for Region West.6 0.2828187 1.272283
## Smoothed Time for Region West.7 0.3545502 1.350997
## Smoothed Time for Region West.8 0.4938181 1.534127
## Smoothed Time for Region West.9 0.5809143 1.677395
## Odds_Ratio_LB Odds_Ratio_UB Standard_Error
## Smoothed Time for Region West.4 1.269545 1.322259 0.01037827
## Smoothed Time for Region West.5 1.232496 1.316059 0.01673476
## Smoothed Time for Region West.6 1.219946 1.326865 0.02143167
## Smoothed Time for Region West.7 1.280352 1.425539 0.02740191
## Smoothed Time for Region West.8 1.436349 1.638560 0.03360033
## Smoothed Time for Region West.9 1.573921 1.787672 0.03248578
## p_value
## Smoothed Time for Region West.4 NA
## Smoothed Time for Region West.5 NA
## Smoothed Time for Region West.6 NA
## Smoothed Time for Region West.7 NA
## Smoothed Time for Region West.8 NA
## Smoothed Time for Region West.9 NA
#save the table into a CSV
# write.csv(round(sensitivity_anlys_2yr_int_lag_full_table,5), "./Data/coefficients_GAM_9_6_21_lagged_2yr_int.csv")
#export a table with just the covariates
#first, find the rows that contains the covariates
covariate_Index<-which(rownames(sensitivity_anlys_2yr_int_lag_full_table) %in% covariates)
sens_analysis_2yr_int_lag_covariate_table<-(round(sensitivity_anlys_2yr_int_lag_full_table[covariate_Index,], 5))
#rename the variables so that it looks cleaner
rownames(sens_analysis_2yr_int_lag_covariate_table)<-c("Naloxone_Pharmacy_Yes",
"Naloxone_Pharmacy_No",
"Medical_Marijuana",
"Recreational_Marijuana",
"GSL",
"PDMP",
"Medicaid_Expansion",
"Intervention",
"Number of States w Intervention")
#now, reorganize the data so that the covariates are on top and the rest of the variable sare below
sens_analysis_2yr_int_lag_covariate_table<-rbind(sens_analysis_2yr_int_lag_covariate_table, sensitivity_anlys_2yr_int_lag_full_table[-covariate_Index,])
#remove the columns that aren't in odds ratio scale
sens_analysis_2yr_int_lag_covariate_table<-sens_analysis_2yr_int_lag_covariate_table[,-which(colnames(sens_analysis_2yr_int_lag_covariate_table) %in%
c("Coefficient_Estimate", "Coefficient_Lower_Bound", "Coefficient_Upper_Bound", "Standard_Error"))]
colnames(sens_analysis_2yr_int_lag_covariate_table)<-c("Risk_Ratio_Estimates", "RR_95_CI_LB", "RR_95_CI_UB", "p-value")
head(sens_analysis_2yr_int_lag_covariate_table, 10)
## Risk_Ratio_Estimates RR_95_CI_LB RR_95_CI_UB
## Naloxone_Pharmacy_Yes 9.729600e-01 9.582600e-01 9.878900e-01
## Naloxone_Pharmacy_No 1.005420e+00 9.926400e-01 1.018360e+00
## Medical_Marijuana 1.067910e+00 1.056120e+00 1.079830e+00
## Recreational_Marijuana 9.615700e-01 9.454500e-01 9.779600e-01
## GSL 1.034410e+00 1.023050e+00 1.045880e+00
## PDMP 9.823800e-01 9.712700e-01 9.936200e-01
## Medicaid_Expansion 1.107430e+00 1.094400e+00 1.120620e+00
## Intervention 1.036340e+00 1.026950e+00 1.045800e+00
## Number of States w Intervention 1.000110e+00 9.978100e-01 1.002420e+00
## Intercept/Alabama 5.617135e-05 5.298983e-05 5.954389e-05
## p-value
## Naloxone_Pharmacy_Yes 0.00042
## Naloxone_Pharmacy_No 0.40772
## Medical_Marijuana 0.00000
## Recreational_Marijuana 0.00001
## GSL 0.00000
## PDMP 0.00219
## Medicaid_Expansion 0.00000
## Intervention 0.00000
## Number of States w Intervention 0.92543
## Intercept/Alabama 0.00000
#save the table into a CSV
# write.csv(round(sens_analysis_2yr_int_lag_covariate_table, 3), "./Data/coefficients_covariates_9_6_21_2_yr_int_lag.csv")
Attributable Deaths
################# Sensitivity Analysis 5: Number of Attributable Deaths #################
#find the number of deaths attributable to the intervention
#first, we subset the data so that we only focus on the time points for which at least one state had the intervention
attr_deaths_anlys_2yr_int_lag<-sensitivity_anlys_2yr_int_lag[which(sensitivity_anlys_2yr_int_lag$int_2_yr_effect_lag>0),]
#compute the probability of overdose had intervention not occurred
prob_od_no_int_2yr_int_lag<-expit(-coef(lagged_analysis_model)["int_2_yr_effect_lag"]
+ logit(attr_deaths_anlys_2yr_int_lag$imputed_deaths/attr_deaths_anlys_2yr_int_lag$population))
#compute the lower and upper bounds of 95% CI of probability of overdose had intervention not occurred
#here, we compute the lower and upper bounds of the 95% CI of all the coefficients using the standard error from the model
coef_lb<-coef(lagged_analysis_model) - 1.96*summary(lagged_analysis_model)$se
coef_ub<-coef(lagged_analysis_model) + 1.96*summary(lagged_analysis_model)$se
#we then calculate the upper and lower bounds of the probability of overdose death had intervention not occurred by using
#the lower and upper bounds of the coefficient of the intervention variable
prob_od_no_int_LB_2yr_int_lag<-expit(-coef_lb[names(coef_lb) == "int_2_yr_effect_lag"]
+ logit(attr_deaths_anlys_2yr_int_lag$imputed_deaths/attr_deaths_anlys_2yr_int_lag$population))
prob_od_no_int_UB_2yr_int_lag<-expit(-coef_ub[names(coef_ub) == "int_2_yr_effect_lag"]
+ logit(attr_deaths_anlys_2yr_int_lag$imputed_deaths/attr_deaths_anlys_2yr_int_lag$population))
#estimate the number of deaths attributable to the intervention
#first, initialize the vectors to store the numbers
num_attr_od_UB<-num_attr_od_LB<-num_attr_od<-rep(NA, length(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_ID)))
#for each time period, we first find the indices of rows containing data from that time point
#then, we find the total number of deaths attributable to the intervention
index<-1 #keep track of where to store the values in the vector
for(time in sort(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_ID))){
#find the indices of rows where the time point = time
time_point_index<-which(attr_deaths_anlys_2yr_int_lag$Time_Period_ID == time)
#find the number of deaths attributable to intervention = observed number of deaths with intervention - estimated number of deaths had intervention not occurred
num_attr_od[index]<-sum(attr_deaths_anlys_2yr_int_lag$imputed_deaths[time_point_index]
- prob_od_no_int_2yr_int_lag[time_point_index]*attr_deaths_anlys_2yr_int_lag$population[time_point_index])
#find the lower and upper bounds of the estimated number of deaths attributable to the intervention
num_attr_od_LB[index]<-sum(attr_deaths_anlys_2yr_int_lag$imputed_deaths[time_point_index]
- prob_od_no_int_LB_2yr_int_lag[time_point_index]*attr_deaths_anlys_2yr_int_lag$population[time_point_index])
num_attr_od_UB[index]<-sum(attr_deaths_anlys_2yr_int_lag$imputed_deaths[time_point_index]
- prob_od_no_int_UB_2yr_int_lag[time_point_index]*attr_deaths_anlys_2yr_int_lag$population[time_point_index])
index<-index + 1
}
num_attr_od_2yr_int_lag<-data.frame("Time_Period_ID" = sort(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_ID)),
"Time_Start" = sort(unique(attr_deaths_anlys_2yr_int_lag$Time_Period_Start)),
"Num_Attr_Deaths" = num_attr_od,
"Num_Attr_Deaths_LB" = num_attr_od_LB,
"Num_Attr_Deaths_UB" = num_attr_od_UB)
#sum up the total number of excess deaths attributable to the intervention
sum(num_attr_od_2yr_int_lag$Num_Attr_Deaths)
## [1] 17799.01
summary(num_attr_od_2yr_int_lag$Num_Attr_Deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11.61 165.88 389.67 456.38 654.51 1106.00
num_attr_od_2yr_int_lag$Time_Start<-as.Date(num_attr_od_2yr_int_lag$Time_Start)
#compute the 95% CI for the total
sum(num_attr_od_2yr_int_lag$Num_Attr_Deaths_LB)
## [1] 13323.73
sum(num_attr_od_2yr_int_lag$Num_Attr_Deaths_UB)
## [1] 22233.78
#sum up the number of excess deaths per year
yearly_num_Attr_Deaths_2yr_int_lag<-num_attr_od_2yr_int_lag %>%
group_by("year" = year(Time_Start)) %>%
summarise("deaths" = sum(Num_Attr_Deaths), death_lb = sum(Num_Attr_Deaths_LB),
death_ub = sum(Num_Attr_Deaths_UB))
summary(yearly_num_Attr_Deaths_2yr_int_lag$deaths)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 11.61 305.94 746.89 889.95 1264.13 2107.53
summary(yearly_num_Attr_Deaths_2yr_int_lag$death_lb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.687 229.016 559.093 666.187 946.285 1577.626
summary(yearly_num_Attr_Deaths_2yr_int_lag$death_ub)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.5 382.2 933.0 1111.7 1579.1 2632.6
ggplot(yearly_num_Attr_Deaths_2yr_int_lag, aes(x = year, y = deaths)) + geom_line() + geom_point()

Compile Attributable Deaths Results
########################## Compiled Attributable Deaths Plot###############################
#add color column to the datasets
yearly_num_Attr_Deaths_main_analysis <- yearly_num_Attr_Deaths_main_analysis %>%
mutate(gp_type = "a")
yearly_num_Attr_Deaths_exclude_states <- yearly_num_Attr_Deaths_exclude_states %>%
mutate(gp_type = "c")
yearly_num_Attr_Deaths_od_all <- yearly_num_Attr_Deaths_od_all %>%
mutate(gp_type = "b")
yearly_num_Attr_Deaths_redefine_int <- yearly_num_Attr_Deaths_redefine_int %>%
mutate(gp_type = "d")
pdf("Figures/num_attr_deaths_yearly_for_all_anlys_9_6_21_all_od_shape_alpha.pdf")
ggplot(yearly_num_Attr_Deaths_main_analysis) +
geom_line(aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1, color = gp_type,
linetype = "Estimate")) +
geom_point(aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1, color = gp_type, shape = gp_type)) +
geom_line(yearly_num_Attr_Deaths_main_analysis,
mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
color = gp_type, linetype = "95% Confidence Interval")) +
geom_line(yearly_num_Attr_Deaths_main_analysis,
mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
color =gp_type,linetype = "95% Confidence Interval")) +
# geom_point(yearly_num_Attr_Deaths_main_analysis,
# mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
# color = "a")) +
# geom_point(yearly_num_Attr_Deaths_main_analysis,
# mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
# color ="a")) +
geom_line(yearly_num_Attr_Deaths_redefine_int,
mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
color = gp_type, linetype = "Estimate")) +
geom_point(yearly_num_Attr_Deaths_redefine_int,
mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
color = gp_type, shape = gp_type)) +
geom_line(yearly_num_Attr_Deaths_redefine_int,
mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
color = gp_type, linetype = "95% Confidence Interval")) +
geom_line(yearly_num_Attr_Deaths_redefine_int,
mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
color = gp_type, linetype = "95% Confidence Interval")) +
# geom_point(yearly_num_Attr_Deaths_redefine_int,
# mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
# color = "d")) +
# geom_point(yearly_num_Attr_Deaths_redefine_int,
# mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
# color = "d")) +
geom_line(yearly_num_Attr_Deaths_od_all, mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
color = gp_type, linetype = "Estimate"), alpha = 1) +
geom_point(yearly_num_Attr_Deaths_od_all, mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
color = gp_type, shape = gp_type), alpha = 1) +
geom_line(yearly_num_Attr_Deaths_od_all,
mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
color = gp_type, linetype = "95% Confidence Interval"), alpha = 1) +
geom_line(yearly_num_Attr_Deaths_od_all,
mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
color = gp_type, linetype = "95% Confidence Interval"), alpha = 1) +
# geom_point(yearly_num_Attr_Deaths_od_all,
# mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
# color = "b"), alpha = 0.5) +
# geom_point(yearly_num_Attr_Deaths_od_all,
# mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
# color = "b") , alpha = 0.5) +
geom_line(yearly_num_Attr_Deaths_exclude_states,
mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
color = gp_type, linetype = "Estimate"), alpha = 1) +
geom_point(yearly_num_Attr_Deaths_exclude_states, mapping = aes(x = as.Date(as.yearmon(year)), y = deaths, group = 1,
color = gp_type, shape = gp_type), alpha = 1) +
geom_line(yearly_num_Attr_Deaths_exclude_states,
mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
color = gp_type, linetype = "95% Confidence Interval"),alpha = 1) +
geom_line(yearly_num_Attr_Deaths_exclude_states,
mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
color = gp_type, linetype = "95% Confidence Interval"), alpha = 1) +
# geom_point(yearly_num_Attr_Deaths_exclude_states,
# mapping = aes(x = as.Date(as.yearmon(year)), y = death_ub, group = 1,
# color = "c"),alpha = 0.5) +
# geom_point(yearly_num_Attr_Deaths_exclude_states,
# mapping = aes(x = as.Date(as.yearmon(year)), y = death_lb, group = 1,
# color = "c"), alpha = 0.5) +
theme(axis.text.y=element_text(size=10, family = "Times"),
axis.title=element_text(size=10,face="bold", family = "Times"),
panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
axis.text.x = element_text(size = 10, family = "Times"),
panel.background = element_rect("white"),
legend.text=element_text(size=9, family = "Times"),
legend.position = c(.4,.85),
# legend.position = "bottom",
legend.box="vertical", legend.margin=margin()) +
guides(color=guide_legend(nrow=2,byrow=TRUE)) +
labs(x = "Year", y = "Yearly Drug Overdose Deaths Attributable to DIH Prosecutions Reported in Media", color = "",
linetype = "", shape = "") +
scale_color_manual(values=c('black', 'red', 'green', 'deepskyblue'),
labels = c("Main Analysis: Unintentional Drug Overdose Deaths", "All Drug Overdose Deaths",
"Excluding States with At Least 75% Missing Monthly", "2 Year Effect")) +
scale_x_date(date_labels="%Y", breaks = seq(as.Date("2000-01-01"), as.Date("2018-01-01"), by = "2 years")) +
scale_linetype_manual(values = c("Estimate" = "solid", "95% Confidence Interval" = "dashed"),
breaks = c("Estimate", "95% Confidence Interval")) +
guides(linetype = guide_legend(nrow = 1)) +
scale_shape_manual(values = c(16, 4, 5, 2),
labels = c("Main Analysis: Unintentional Drug Overdose Deaths", "All Drug Overdose Deaths",
"Excluding States with At Least 75% Missing Monthly", "2 Year Effect"))
dev.off()
## quartz_off_screen
## 2
Sensitivity Analysis 10: Including Unemployment Rates and Age Groups
Clean Data
#create age groups using the population dataset
pop_by_age_2010<- read.csv("./Data/pop_by_age_state_2000_2010.csv")
head(pop_by_age_2010)
## REGION DIVISION STATE NAME SEX AGE ESTIMATESBASE2000 POPESTIMATE2000
## 1 0 0 0 United States 0 0 3805718 3855956
## 2 0 0 0 United States 0 1 3820647 3798691
## 3 0 0 0 United States 0 2 3790534 3800144
## 4 0 0 0 United States 0 3 3832855 3821118
## 5 0 0 0 United States 0 4 3926400 3902384
## 6 0 0 0 United States 0 5 3965175 3967834
## POPESTIMATE2001 POPESTIMATE2002 POPESTIMATE2003 POPESTIMATE2004
## 1 4012658 3951461 3975871 4014258
## 2 3855407 4004674 3936139 3953063
## 3 3800096 3856114 4002836 3933735
## 4 3802710 3804336 3860727 4008220
## 5 3827346 3812607 3816873 3876609
## 6 3910033 3837187 3823568 3829607
## POPESTIMATE2005 POPESTIMATE2006 POPESTIMATE2007 POPESTIMATE2008
## 1 4004393 4041738 4147997 4132735
## 2 3987032 3972124 4002215 4100756
## 3 3952632 3988119 3973479 4004146
## 4 3943215 3966022 4004011 3992320
## 5 4030128 3970880 3998260 4041170
## 6 3893128 4050582 3993489 4024297
## POPESTIMATE2009 CENSUS2010POP POPESTIMATE2010
## 1 4003587 3944153 3952444
## 2 4078797 3978070 3951024
## 3 4103002 4096929 4087074
## 4 4025675 4119040 4133855
## 5 4033457 4063170 4076132
## 6 4070265 4056858 4069577
#base the age groups on abouk, powell, pacula
#age groups: 0 - 17, 18 - 34, 35 - 54, 65+
pop_by_age_2010_group <- pop_by_age_2010 %>%
#999 indicates total population
filter(AGE < 999, AGE >= 18) %>%
#create age groups
mutate(age_groups = ifelse(AGE >= 18 & AGE <= 24, "18_24",
ifelse(AGE >= 25 & AGE <= 34, "25_34",
ifelse(AGE >= 35 & AGE <= 44, "35_44",
ifelse(AGE >= 45 & AGE <= 64, "45_64", "65_plus"))))) %>%
#we filter according to how the DIH prosecutions were filtered, excluding cases with minors
filter(NAME != "United States", SEX == 0) %>%
group_by(NAME, age_groups) %>%
summarise(pop_2000 = sum(ESTIMATESBASE2000),
pop_2001 = sum(POPESTIMATE2001),
pop_2002 = sum(POPESTIMATE2002),
pop_2003 = sum(POPESTIMATE2003),
pop_2004 = sum(POPESTIMATE2004),
pop_2005 = sum(POPESTIMATE2005),
pop_2006 = sum(POPESTIMATE2006),
pop_2007 = sum(POPESTIMATE2007),
pop_2008 = sum(POPESTIMATE2008),
pop_2009 = sum(POPESTIMATE2009),
pop_2010 = sum(CENSUS2010POP))
#also make distribution of each age group
#define proportion function
#calculate the proportion of the age group for each state, year
prop <- function(x){x/sum(x)}
pop_by_age_2010_group <- pop_by_age_2010_group %>%
group_by(NAME) %>%
mutate_if(is.numeric, prop)
pop_by_age_2019 <- read.csv("./Data/pop_by_age_state_2010_2019.csv")
head(pop_by_age_2019)
## SUMLEV REGION DIVISION STATE NAME SEX AGE ESTBASE2010_CIV
## 1 10 0 0 0 United States 0 0 3944160
## 2 10 0 0 0 United States 0 1 3978090
## 3 10 0 0 0 United States 0 2 4096939
## 4 10 0 0 0 United States 0 3 4119051
## 5 10 0 0 0 United States 0 4 4063186
## 6 10 0 0 0 United States 0 5 4056872
## POPEST2010_CIV POPEST2011_CIV POPEST2012_CIV POPEST2013_CIV POPEST2014_CIV
## 1 3951430 3963092 3926570 3931258 3954787
## 2 3957730 3966225 3977549 3942698 3948891
## 3 4090621 3970654 3978925 3991740 3958711
## 4 4111688 4101644 3981531 3991017 4005928
## 5 4077346 4121488 4111490 3992502 4004032
## 6 4064521 4087054 4131049 4121876 4004576
## POPEST2015_CIV POPEST2016_CIV POPEST2017_CIV POPEST2018_CIV POPEST2019_CIV
## 1 3983981 3954773 3893990 3815343 3783052
## 2 3973133 4002903 3972711 3908830 3829599
## 3 3966321 3991349 4020045 3987032 3922044
## 4 3974351 3982984 4006946 4033038 3998665
## 5 4020292 3989750 3997280 4018719 4043323
## 6 4017589 4035033 4003452 4008443 4028281
pop_by_age_2019_group <- pop_by_age_2019 %>%
#999 indicates total population
filter(AGE < 999, AGE >= 18) %>%
#create age groups
mutate(age_groups = ifelse(AGE >= 18 & AGE <= 24, "18_24",
ifelse(AGE >= 25 & AGE <= 34, "25_34",
ifelse(AGE >= 35 & AGE <= 44, "35_44",
ifelse(AGE >= 45 & AGE <= 64, "45_64", "65_plus"))))) %>%
filter(NAME != "United States", SEX == 0) %>%
group_by(NAME, age_groups) %>%
summarise(pop_2011 = sum(POPEST2011_CIV),
pop_2012 = sum(POPEST2012_CIV),
pop_2013 = sum(POPEST2013_CIV),
pop_2014 = sum(POPEST2014_CIV),
pop_2015 = sum(POPEST2015_CIV),
pop_2016 = sum(POPEST2016_CIV),
pop_2017 = sum(POPEST2017_CIV),
pop_2018 = sum(POPEST2018_CIV),
pop_2019 = sum(POPEST2019_CIV))
#calculate the proportion of the age group for each year, state
pop_by_age_2019_group <- pop_by_age_2019_group %>%
group_by(NAME) %>%
mutate_if(is.numeric, prop)
#combine the population and unemployment data to the main analysis dataset
population_prop_data <- merge(pop_by_age_2010_group, pop_by_age_2019_group, by = c("NAME", "age_groups"))
#change the population and unemployment dataset from wide to long
population_prop_data_long <- population_prop_data %>%
pivot_longer(
#change the cols that start with "pop"
cols = starts_with("pop"),
#put the column names into a col named "year"
names_to = "year",
#put the values into a col names "population_prop"
values_to = "population_prop",
#delete the "pop_" from the column names
names_prefix = "pop_"
)
#now we want to pivot the table to wider so that each age_group population share is a column
population_prop_data_final <- population_prop_data_long %>%
pivot_wider(
#each row identified by state and year
id_cols = c("NAME", "year"),
#take the column names from the age groups
names_from = age_groups,
#take the values from the population proportion column
values_from = population_prop,
#add "pop_prop_" to the col names
names_prefix = "pop_prop_"
)
#skip the first three rows because that is the header
#the series ID corresponds to the different states in order, so have to rename the IDs -- checked this manually 11/29/21
#we manually retrieved the data from BLS to get seasonally adjusted unemployment rates for each state
unemployment_rate <- readxl::read_excel("./Data/bls_unemployment_rate_2000_2019.xlsx", skip = 3)
unemployment_rate <- unemployment_rate %>%
mutate(State = unique(main_analysis_data$State))
#change dataset from wide to long
unemployment_rate_long <- unemployment_rate %>%
pivot_longer(
cols = is.numeric,
#add column for the date
names_to = "time",
#add column where values are the unemployment rate
values_to = "unemployment_rate"
) %>%
#remove the columns which are not needed or are all NA
dplyr::select(-`Series ID`,
-`Nov\n2021` ,
-`Dec\n2021`) %>%
#change date to date object
mutate(time = mdy(time)) %>%
#filter to only keep date up to 2019
filter(year(time) <= 2019)
#since we need 6 month groupings, we take the average per 6 month and state
unemployment_rate_summary <- unemployment_rate_long %>%
#compute the six month period for each date
mutate(six_month_pd = floor_date(time, unit = "6 months")) %>%
#group by state and six month period
group_by(State, six_month_pd) %>%
#compute the mean unemployment rate
summarise(mean_unemployment = mean(unemployment_rate/100))
#finally merge the two datasets into the main analysis dataset
#first create a temporary main analysis data with a year column
main_analysis_data_w_year <- main_analysis_data %>%
mutate(year = year(Time_Period_Start))
#merge the age group proportion data
sensitivity_anlys_age_gp_unemp <- merge(main_analysis_data_w_year, population_prop_data_final,
by.x = c("State", "year"), by.y = c("NAME", "year"))
#merge the unemployment rate
sensitivity_anlys_age_gp_unemp <- merge(sensitivity_anlys_age_gp_unemp, unemployment_rate_summary,
by.x = c("State", "Time_Period_Start"), by.y = c("State", "six_month_pd"))
#remove the year column
sensitivity_anlys_age_gp_unemp <- sensitivity_anlys_age_gp_unemp %>%
dplyr::select(-year) %>%
mutate(mean_unemployment = mean_unemployment)
Quick EDA
#do an EDA to check drug overdoses by age group and unemployment rate
ggplot(sensitivity_anlys_age_gp_unemp) +
geom_line(aes(y = imputed_deaths/population, x = pop_prop_18_24, color = "18_24")) +
geom_line(aes(y = imputed_deaths/population, x = pop_prop_25_34, color = "25_34")) +
geom_line(aes(y = imputed_deaths/population, x = pop_prop_35_44, color = "35_44")) +
geom_line(aes(y = imputed_deaths/population, x = pop_prop_45_64, color = "45_64")) +
geom_line(aes(y = imputed_deaths/population, x = pop_prop_65_plus, color = "65+")) +
facet_wrap(~State)

ggplot(sensitivity_anlys_age_gp_unemp) +
geom_line( aes(y = (imputed_deaths/population)*100, x = Time_Period_Start, color = "od_rate", group = 1)) +
geom_line( aes(y = mean_unemployment, x = Time_Period_Start, color = "unemp", group = 1)) +
facet_wrap(~State)

############################## Run Model with Spline Time Effects by Region ###############################
#model that we will be using for the main analysis
#cr is used for cubic regression spline -- we are smoothing time effects by region
#run the analysis for all the states
sensitivity_anlys_age_unemp_model<-gam(cbind(round(imputed_deaths), round(num_alive))~ State +
s(Time_Period_ID, bs = "cr", by = as.factor(Region)) +
Naloxone_Pharmacy_Yes_Redefined +
Naloxone_Pharmacy_No_Redefined +
Medical_Marijuana_Redefined +
Recreational_Marijuana_Redefined +
GSL_Redefined +
PDMP_Redefined +
Medicaid_Expansion_Redefined +
Intervention_Redefined +
num_states_w_intervention +
mean_unemployment +
pop_prop_25_34 +
pop_prop_35_44 +
pop_prop_45_64 +
pop_prop_65_plus,
data = sensitivity_anlys_age_gp_unemp,
family = "binomial")
# summary(sensitivity_anlys_age_unemp_model)
#summary output of the model
stargazer(sensitivity_anlys_age_unemp_model, type = "html", dep.var.labels = c("Unintentional Overdose Deaths"))
|
|
|
|
Dependent variable:
|
|
|
|
|
|
Unintentional Overdose Deaths
|
|
|
|
StateAlaska
|
0.670***
|
|
|
(0.040)
|
|
|
|
|
StateArizona
|
0.353***
|
|
|
(0.017)
|
|
|
|
|
StateArkansas
|
-0.414***
|
|
|
(0.020)
|
|
|
|
|
StateCalifornia
|
0.125***
|
|
|
(0.021)
|
|
|
|
|
StateColorado
|
0.371***
|
|
|
(0.023)
|
|
|
|
|
StateConnecticut
|
0.149***
|
|
|
(0.018)
|
|
|
|
|
StateDelaware
|
0.345***
|
|
|
(0.023)
|
|
|
|
|
StateFlorida
|
0.035*
|
|
|
(0.021)
|
|
|
|
|
StateGeorgia
|
0.211***
|
|
|
(0.020)
|
|
|
|
|
StateHawaii
|
-0.299***
|
|
|
(0.027)
|
|
|
|
|
StateIdaho
|
-0.047*
|
|
|
(0.025)
|
|
|
|
|
StateIllinois
|
0.128***
|
|
|
(0.015)
|
|
|
|
|
StateIndiana
|
0.179***
|
|
|
(0.015)
|
|
|
|
|
StateIowa
|
-0.803***
|
|
|
(0.023)
|
|
|
|
|
StateKansas
|
-0.293***
|
|
|
(0.021)
|
|
|
|
|
StateKentucky
|
0.691***
|
|
|
(0.014)
|
|
|
|
|
StateLouisiana
|
0.406***
|
|
|
(0.017)
|
|
|
|
|
StateMaine
|
-0.115***
|
|
|
(0.031)
|
|
|
|
|
StateMaryland
|
-0.950***
|
|
|
(0.022)
|
|
|
|
|
StateMassachusetts
|
0.267***
|
|
|
(0.014)
|
|
|
|
|
StateMichigan
|
-0.028*
|
|
|
(0.014)
|
|
|
|
|
StateMinnesota
|
-0.540***
|
|
|
(0.019)
|
|
|
|
|
StateMississippi
|
-0.019
|
|
|
(0.019)
|
|
|
|
|
StateMissouri
|
0.158***
|
|
|
(0.015)
|
|
|
|
|
StateMontana
|
-0.524***
|
|
|
(0.031)
|
|
|
|
|
StateNebraska
|
-0.852***
|
|
|
(0.030)
|
|
|
|
|
StateNevada
|
0.622***
|
|
|
(0.020)
|
|
|
|
|
StateNew Hampshire
|
0.159***
|
|
|
(0.024)
|
|
|
|
|
StateNew Jersey
|
0.159***
|
|
|
(0.018)
|
|
|
|
|
StateNew Mexico
|
0.635***
|
|
|
(0.017)
|
|
|
|
|
StateNew York
|
-0.161***
|
|
|
(0.014)
|
|
|
|
|
StateNorth Carolina
|
0.264***
|
|
|
(0.013)
|
|
|
|
|
StateNorth Dakota
|
-1.017***
|
|
|
(0.050)
|
|
|
|
|
StateOhio
|
0.428***
|
|
|
(0.013)
|
|
|
|
|
StateOklahoma
|
0.425***
|
|
|
(0.017)
|
|
|
|
|
StateOregon
|
-0.178***
|
|
|
(0.018)
|
|
|
|
|
StatePennsylvania
|
0.322***
|
|
|
(0.015)
|
|
|
|
|
StateRhode Island
|
0.232***
|
|
|
(0.022)
|
|
|
|
|
StateSouth Carolina
|
0.208***
|
|
|
(0.015)
|
|
|
|
|
StateSouth Dakota
|
-1.028***
|
|
|
(0.043)
|
|
|
|
|
StateTennessee
|
0.484***
|
|
|
(0.014)
|
|
|
|
|
StateTexas
|
0.146***
|
|
|
(0.024)
|
|
|
|
|
StateUtah
|
0.574***
|
|
|
(0.048)
|
|
|
|
|
StateVermont
|
-0.316***
|
|
|
(0.033)
|
|
|
|
|
StateVirginia
|
0.017
|
|
|
(0.016)
|
|
|
|
|
StateWashington
|
0.233***
|
|
|
(0.018)
|
|
|
|
|
StateWest Virginia
|
0.687***
|
|
|
(0.021)
|
|
|
|
|
StateWisconsin
|
-0.052***
|
|
|
(0.015)
|
|
|
|
|
StateWyoming
|
0.032
|
|
|
(0.034)
|
|
|
|
|
Naloxone_Pharmacy_Yes_Redefined
|
-0.011
|
|
|
(0.008)
|
|
|
|
|
Naloxone_Pharmacy_No_Redefined
|
0.017**
|
|
|
(0.007)
|
|
|
|
|
Medical_Marijuana_Redefined
|
0.063***
|
|
|
(0.006)
|
|
|
|
|
Recreational_Marijuana_Redefined
|
-0.053***
|
|
|
(0.009)
|
|
|
|
|
GSL_Redefined
|
0.013**
|
|
|
(0.006)
|
|
|
|
|
PDMP_Redefined
|
-0.045***
|
|
|
(0.006)
|
|
|
|
|
Medicaid_Expansion_Redefined
|
0.088***
|
|
|
(0.006)
|
|
|
|
|
Intervention_Redefined
|
0.062***
|
|
|
(0.006)
|
|
|
|
|
num_states_w_intervention
|
0.005***
|
|
|
(0.002)
|
|
|
|
|
mean_unemployment
|
-0.636***
|
|
|
(0.182)
|
|
|
|
|
pop_prop_25_34
|
0.698
|
|
|
(0.594)
|
|
|
|
|
pop_prop_35_44
|
-3.139***
|
|
|
(0.588)
|
|
|
|
|
pop_prop_45_64
|
3.153***
|
|
|
(0.694)
|
|
|
|
|
pop_prop_65_plus
|
5.840***
|
|
|
(0.634)
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Midwest.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)Northeast.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)South.9
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.1
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.2
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.3
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.4
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.5
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.6
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.7
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.8
|
|
|
|
|
|
|
|
|
s(Time_Period_ID):as.factor(Region)West.9
|
|
|
|
|
|
|
|
|
Constant
|
-11.637***
|
|
|
(0.500)
|
|
|
|
|
|
|
Observations
|
2,000
|
|
Adjusted R2
|
0.913
|
|
Log Likelihood
|
-16,521.120
|
|
UBRE
|
8.610
|
|
|
|
Note:
|
p<0.1; p<0.05; p<0.01
|
Sandwich Estimator
#here, we estimate the variance-covariance matrix through the sandwich estimator
#we only want to estimate the variances for the policies, intervention variable, and number of states with intervention
sensitivity_anlys_age_unemp_subset_cov <- sensitivity_anlys_age_gp_unemp %>%
ungroup() %>%
dplyr::select(
Naloxone_Pharmacy_Yes_Redefined,
Naloxone_Pharmacy_No_Redefined,
Medical_Marijuana_Redefined,
Recreational_Marijuana_Redefined,
GSL_Redefined,
PDMP_Redefined,
Medicaid_Expansion_Redefined,
Intervention_Redefined,
num_states_w_intervention,
mean_unemployment,
pop_prop_25_34,
pop_prop_35_44,
pop_prop_45_64,
pop_prop_65_plus)
#estimate the 95% CI and SD
sensitivity_anlys_age_unemp_subset_coefficient_values <- tail(summary(sensitivity_anlys_age_unemp_model)$p.coeff,
ncol(sensitivity_anlys_age_unemp_subset_cov))
sensitivity_anlys_age_unemp_subset_pred_prob <- predict(sensitivity_anlys_age_unemp_model,
newdata = sensitivity_anlys_age_gp_unemp, type = "response")
sensitivity_anlys_age_unemp_subset_sd_and_ci <- compute_sd_and_CI(sensitivity_anlys_age_unemp_subset_cov,
sensitivity_anlys_age_gp_unemp$population,
sensitivity_anlys_age_gp_unemp$imputed_deaths,
sensitivity_anlys_age_unemp_subset_pred_prob,
sensitivity_anlys_age_unemp_subset_coefficient_values)
sensitivity_anlys_age_unemp_subset_sd_and_ci
## lb_coef coef_values ub_coef
## Naloxone_Pharmacy_Yes_Redefined -0.053662780 -0.011335338 0.030992104
## Naloxone_Pharmacy_No_Redefined -0.011742544 0.016826931 0.045396406
## Medical_Marijuana_Redefined 0.041978748 0.063473833 0.084968919
## Recreational_Marijuana_Redefined -0.085362237 -0.053064003 -0.020765770
## GSL_Redefined -0.023273436 0.012994996 0.049263427
## PDMP_Redefined -0.072403764 -0.044885312 -0.017366860
## Medicaid_Expansion_Redefined 0.061030468 0.087510426 0.113990383
## Intervention_Redefined 0.035468127 0.061621342 0.087774556
## num_states_w_intervention 0.003369738 0.005478243 0.007586749
## mean_unemployment -1.085829563 -0.636266543 -0.186703523
## pop_prop_25_34 -0.352123019 0.697823342 1.747769703
## pop_prop_35_44 -4.417798974 -3.139469107 -1.861139241
## pop_prop_45_64 2.629349243 3.152988396 3.676627548
## pop_prop_65_plus 5.368064774 5.840360001 6.312655229
## exp_lb exp_coef exp_ub
## Naloxone_Pharmacy_Yes_Redefined 0.94775165 0.98872866 1.0314774
## Naloxone_Pharmacy_No_Redefined 0.98832613 1.01696930 1.0464426
## Medical_Marijuana_Redefined 1.04287231 1.06553160 1.0886832
## Recreational_Marijuana_Redefined 0.91817963 0.94831931 0.9794484
## GSL_Redefined 0.97699530 1.01307980 1.0504970
## PDMP_Redefined 0.93015526 0.95610713 0.9827831
## Medicaid_Expansion_Redefined 1.06293130 1.09145364 1.1207413
## Intervention_Redefined 1.03610462 1.06355954 1.0917420
## num_states_w_intervention 1.00337542 1.00549328 1.0076156
## mean_unemployment 0.33762159 0.52926473 0.8296897
## pop_prop_25_34 0.70319361 2.00937422 5.7417825
## pop_prop_35_44 0.01206075 0.04330578 0.1554954
## pop_prop_45_64 13.86474439 23.40590628 39.5129137
## pop_prop_65_plus 214.44746152 343.90312395 551.5073847
## sd_coef
## Naloxone_Pharmacy_Yes_Redefined 0.021595634
## Naloxone_Pharmacy_No_Redefined 0.014576263
## Medical_Marijuana_Redefined 0.010966880
## Recreational_Marijuana_Redefined 0.016478691
## GSL_Redefined 0.018504302
## PDMP_Redefined 0.014040026
## Medicaid_Expansion_Redefined 0.013510183
## Intervention_Redefined 0.013343477
## num_states_w_intervention 0.001075768
## mean_unemployment 0.229368888
## pop_prop_25_34 0.535686919
## pop_prop_35_44 0.652209116
## pop_prop_45_64 0.267162833
## pop_prop_65_plus 0.240966953